Compare commits

...

13 Commits

Author SHA1 Message Date
Ian Fijolek
7d87c3d036 Add default values for AlertEvery
There is also a test error corrected in TestMonitorFailureAlertEvery
where the same test conditions were repeated twice.
2022-12-19 15:49:32 -08:00
Ian Fijolek
deec04bf0d Allow setting of global defaults for some values
This helps with reducing redundant config.

Note: There is no default for `alert_every` because the zero value has a
meaning and cannot be interpreted as an omission.
2022-12-19 15:49:32 -08:00
Ian Fijolek
958446050f Update linters 2022-12-19 15:34:47 -08:00
Ian Fijolek
88e94642d9 Remove some hooks included in golangci-lint and upgrade existing 2022-06-07 21:39:18 -07:00
Ian Fijolek
bc83a51907 Switch pre-commit url for golang 2022-04-04 20:12:01 -07:00
Ian Fijolek
08b8932331 Update curl version 2022-01-24 16:08:18 -08:00
Ian Fijolek
9072d97bb8 Make linters happy 2022-01-24 10:39:53 -08:00
Ian Fijolek
cdd8a69669 Update go version 2021-12-01 14:47:58 -08:00
Ian Fijolek
3c14a02770 Continue checking all monitors after sending alert
Previously this was mistakenly returning after sending an alert. Now
all alerts will be sent unless there is an exception on one of them.
2021-09-02 10:20:04 -07:00
Ian Fijolek
328ea83c25 Some linting cleanup 2021-09-02 10:19:03 -07:00
Ian Fijolek
ce986e8d1d Roll back to alpine:3.12
Looks like there is a clock issue with raspbian

https://wiki.alpinelinux.org/wiki/Release_Notes_for_Alpine_3.13.0#time64_requirements
2021-05-12 19:06:41 -07:00
Ian Fijolek
31a4b484bf Merge branch 'duration-intervals' 2021-05-12 18:32:12 -07:00
Ian Fijolek
444d060736 Remove qemu-user-static from Dockerfile and update alpine
My build machine now has proper qemu support added, so this is not needed
2021-05-12 23:22:24 +00:00
13 changed files with 118 additions and 86 deletions
+2 -2
View File
@@ -4,7 +4,7 @@ name: test
steps:
- name: test
image: golang:1.15
image: golang:1.17
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
@@ -30,7 +30,7 @@ trigger:
steps:
- name: build all binaries
image: golang:1.15
image: golang:1.17
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
+13 -25
View File
@@ -1,32 +1,22 @@
---
linters:
enable:
- asciicheck
- bodyclose
- dogsled
- dupl
- errname
- errorlint
- exhaustive
- gochecknoinits
- gocognit
- gocritic
- gocyclo
- goerr113
- gofumpt
- goimports
- gomnd
- goprintffuncname
# - gosec
# - ifshort
- interfacer
- maligned
- misspell
- nakedret
- nestif
- nlreturn
- noctx
- unparam
- tagliatelle
- tenv
- testpackage
- thelper
- tparallel
- unconvert
- wrapcheck
- wsl
# - errorlint
disable:
- gochecknoglobals
@@ -34,15 +24,13 @@ linters-settings:
gosec:
excludes:
- G204
# gomnd:
# settings:
# mnd:
# ignored-functions: math.*
tagliatelle:
case:
rules:
yaml: snake
issues:
exclude-rules:
- path: _test\.go
linters:
- errcheck
- gosec
- maligned
+4 -6
View File
@@ -1,7 +1,7 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
rev: v4.4.0
hooks:
- id: check-added-large-files
- id: check-yaml
@@ -10,13 +10,11 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-merge-conflict
- repo: git://github.com/dnephin/pre-commit-golang
rev: v0.4.0
- repo: https://github.com/golangci/golangci-lint
rev: v1.50.1
hooks:
- id: go-fmt
- id: go-imports
- id: golangci-lint
- repo: https://github.com/hadolint/hadolint
rev: v2.4.0
rev: v2.12.1-beta
hooks:
- id: hadolint
+2 -6
View File
@@ -1,15 +1,11 @@
ARG REPO=library
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
FROM ${REPO}/alpine:3.10
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
FROM ${REPO}/alpine:3.12
RUN mkdir /app
WORKDIR /app/
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+3 -5
View File
@@ -1,7 +1,5 @@
ARG REPO=library
FROM golang:1.12-alpine AS builder
RUN apk add --no-cache git=~2
FROM golang:1.17 AS builder
RUN mkdir /app
WORKDIR /app
@@ -16,7 +14,7 @@ ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM ${REPO}/alpine:3.10
FROM ${REPO}/alpine:3.12
RUN mkdir /app
WORKDIR /app/
@@ -24,7 +22,7 @@ WORKDIR /app/
COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+3
View File
@@ -55,6 +55,9 @@ The global configurations are:
|key|value|
|---|---|
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|`monitors`|List of all monitors. Detailed description below|
|`alerts`|List of all alerts. Detailed description below|
+26 -3
View File
@@ -13,9 +13,13 @@ var errInvalidConfig = errors.New("Invalid configuration")
// Config type is contains all provided user configuration
type Config struct {
CheckInterval SecondsOrDuration `yaml:"check_interval"`
Monitors []*Monitor
Alerts map[string]*Alert
CheckInterval SecondsOrDuration `yaml:"check_interval"`
DefaultAlertAfter int16 `yaml:"default_alert_after"`
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
DefaultAlertDown []string `yaml:"default_alert_down"`
DefaultAlertUp []string `yaml:"default_alert_up"`
Monitors []*Monitor
Alerts map[string]*Alert
}
// CommandOrShell type wraps a string or list of strings
@@ -135,8 +139,27 @@ func (config Config) IsValid() (isValid bool) {
// Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) {
for _, monitor := range config.Monitors {
if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
monitor.AlertAfter = config.DefaultAlertAfter
}
if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
monitor.AlertEvery = config.DefaultAlertEvery
}
if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
monitor.AlertDown = config.DefaultAlertDown
}
if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
monitor.AlertUp = config.DefaultAlertUp
}
}
for name, alert := range config.Alerts {
alert.Name = name
if err = alert.BuildTemplates(); err != nil {
return
}
+1
View File
@@ -14,6 +14,7 @@ func TestLoadConfig(t *testing.T) {
pyCompat bool
}{
{"./test/valid-config.yml", false, "Valid config file", false},
{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
{"./test/does-not-exist", true, "Invalid config path", false},
+10 -6
View File
@@ -36,6 +36,8 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
alertNotice.MonitorName, alertNotice.IsUp,
)
return nil
}
for _, alertName := range alertNames {
@@ -66,10 +68,10 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
}
func checkMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
@@ -77,7 +79,11 @@ func checkMonitors(config *Config) error {
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
if alertNotice != nil {
return sendAlerts(config, monitor, alertNotice)
err := sendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, exit early and bubble it up
if err != nil {
return err
}
}
}
}
@@ -92,7 +98,7 @@ func main() {
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", 8080, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
flag.Parse()
// Print version if flag is provided
@@ -116,9 +122,7 @@ func main() {
// Start main loop
for {
err = checkMonitors(&config)
if err != nil {
panic(err)
}
slog.OnErrPanicf(err, "Error checking monitors")
time.Sleep(config.CheckInterval.Value())
}
+9 -9
View File
@@ -16,7 +16,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
},
@@ -28,7 +28,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertAfter: 1,
@@ -41,7 +41,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"ls"}},
alertCount: 1,
@@ -54,7 +54,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"unknown"},
@@ -68,7 +68,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
@@ -82,7 +82,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"good"},
@@ -90,7 +90,7 @@ func TestCheckMonitors(t *testing.T) {
},
},
Alerts: map[string]*Alert{
"good": &Alert{
"good": {
Command: CommandOrShell{Command: []string{"true"}},
},
},
@@ -101,7 +101,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
&Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"},
@@ -109,7 +109,7 @@ func TestCheckMonitors(t *testing.T) {
},
},
Alerts: map[string]*Alert{
"bad": &Alert{
"bad": {
Name: "bad",
Command: CommandOrShell{Command: []string{"false"}},
},
+7 -7
View File
@@ -12,7 +12,7 @@ import (
type Monitor struct { //nolint:maligned
// Config values
AlertAfter int16 `yaml:"alert_after"`
AlertEvery int16 `yaml:"alert_every"`
AlertEvery *int16 `yaml:"alert_every"`
CheckInterval SecondsOrDuration `yaml:"check_interval"`
Name string
AlertDown []string `yaml:"alert_down"`
@@ -129,16 +129,16 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
// Use alert cadence to determine if we should alert
switch {
case monitor.AlertEvery > 0:
// Handle integer number of failures before alerting
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
case monitor.AlertEvery == 0:
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
// Handle alerting on first failure only
if failureCount == 0 {
notice = monitor.createAlertNotice(false)
}
case *monitor.AlertEvery > 0:
// Handle integer number of failures before alerting
if failureCount%*monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
default:
// Handle negative numbers indicating an exponential backoff
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
+26 -17
View File
@@ -141,17 +141,19 @@ func TestMonitorSuccess(t *testing.T) {
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
// hitting the threshold provided by AlertAfter
func TestMonitorFailureAlertAfter(t *testing.T) {
var alertEvery int16 = 1
cases := []struct {
monitor Monitor
expectNotice bool
name string
}{
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"},
}
for _, c := range cases {
@@ -172,6 +174,11 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
// TestMonitorFailureAlertEvery tests that alerts will trigger
// on the expected intervals
func TestMonitorFailureAlertEvery(t *testing.T) {
var alertEvery0, alertEvery1, alertEvery2 int16
alertEvery0 = 0
alertEvery1 = 1
alertEvery2 = 2
cases := []struct {
monitor Monitor
expectNotice bool
@@ -186,20 +193,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
*/
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil
// Alert first time only, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"},
// Alert every time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"},
// Alert every other time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"},
}
for _, c := range cases {
@@ -220,6 +227,8 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
// TestMonitorFailureExponential tests that alerts will trigger
// with an exponential backoff after repeated failures
func TestMonitorFailureExponential(t *testing.T) {
var alertEveryExp int16 = -1
cases := []struct {
expectNotice bool
name string
@@ -236,7 +245,7 @@ func TestMonitorFailureExponential(t *testing.T) {
// Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp}
for _, c := range cases {
log.Printf("Testing case %s", c.name)
+12
View File
@@ -0,0 +1,12 @@
---
check_interval: 1
default_alert_down: ["log_command"]
default_alert_after: 1
monitors:
- name: Command
command: ["echo", "$PATH"]
alerts:
log_command:
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]