Compare commits

..

14 Commits

Author SHA1 Message Date
Ian Fijolek
0b2078c6c5 uncomment test stuff 2021-01-08 18:09:27 -05:00
Ian Fijolek
0ed96f6c22 Remove prerelease 2021-01-08 18:06:56 -05:00
Ian Fijolek
8c12e80ad2 Try to stop building on pushes to non-master 2021-01-08 18:05:33 -05:00
Ian Fijolek
f6a9199f25 Change exec find 2021-01-08 18:01:58 -05:00
Ian Fijolek
3c2cae3011 Switch to ubuntu 2021-01-08 17:59:28 -05:00
Ian Fijolek
f9c082d30f More debugging 2021-01-08 17:56:30 -05:00
Ian Fijolek
5bb4da6178 More debugging 2021-01-08 17:53:19 -05:00
Ian Fijolek
d810bcb61c Try to fix compress step again 2021-01-08 17:44:52 -05:00
Ian Fijolek
e7353bb8df Try to fix compress step 2021-01-08 17:38:55 -05:00
Ian Fijolek
06ea8bea30 Add branch name to unstable release 2021-01-08 17:36:24 -05:00
Ian Fijolek
2e5ab23bd1 Add darwin releases 2021-01-08 17:33:27 -05:00
Ian Fijolek
aa741eb49e Make check step faster 2021-01-08 17:31:56 -05:00
Ian Fijolek
96c9b7d74c Add prereleases 2021-01-08 17:29:57 -05:00
Ian Fijolek
31336280e6 Add release uploads 2021-01-08 17:21:02 -05:00
22 changed files with 222 additions and 517 deletions
+2 -2
View File
@@ -4,7 +4,7 @@ name: test
steps:
- name: test
image: golang:1.17
image: golang:1.12
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
@@ -30,7 +30,7 @@ trigger:
steps:
- name: build all binaries
image: golang:1.17
image: golang:1.12
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
Vendored
-1
View File
@@ -17,5 +17,4 @@ config.yml
# Output binary
minitor
minitor-go
dist/
-36
View File
@@ -1,36 +0,0 @@
---
linters:
enable:
- errname
- errorlint
- exhaustive
- gofumpt
- goimports
- gomnd
- goprintffuncname
- misspell
- tagliatelle
- tenv
- testpackage
- thelper
- tparallel
- unconvert
- wrapcheck
- wsl
disable:
- gochecknoglobals
linters-settings:
gosec:
excludes:
- G204
tagliatelle:
case:
rules:
yaml: snake
issues:
exclude-rules:
- path: _test\.go
linters:
- gosec
+12 -8
View File
@@ -1,7 +1,7 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v2.4.0
hooks:
- id: check-added-large-files
- id: check-yaml
@@ -10,11 +10,15 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-merge-conflict
- repo: https://github.com/golangci/golangci-lint
rev: v1.50.1
- repo: git://github.com/dnephin/pre-commit-golang
rev: v0.3.5
hooks:
- id: golangci-lint
- repo: https://github.com/hadolint/hadolint
rev: v2.12.1-beta
hooks:
- id: hadolint
- id: go-fmt
- id: go-imports
# - id: gometalinter
# - id: golangci-lint
# - repo: https://github.com/IamTheFij/docker-pre-commit
# rev: v2.0.0
# hooks:
# - id: docker-compose-check
# - id: hadolint
+6 -2
View File
@@ -1,11 +1,15 @@
ARG REPO=library
FROM ${REPO}/alpine:3.12
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
FROM ${REPO}/alpine:3.10
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
RUN mkdir /app
WORKDIR /app/
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+5 -3
View File
@@ -1,5 +1,7 @@
ARG REPO=library
FROM golang:1.17 AS builder
FROM golang:1.12-alpine AS builder
RUN apk add --no-cache git=~2
RUN mkdir /app
WORKDIR /app
@@ -14,7 +16,7 @@ ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM ${REPO}/alpine:3.12
FROM ${REPO}/alpine:3.10
RUN mkdir /app
WORKDIR /app/
@@ -22,7 +24,7 @@ WORKDIR /app/
COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+4 -17
View File
@@ -54,10 +54,7 @@ The global configurations are:
|key|value|
|---|---|
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|`check_interval`|Maximum frequency to run checks for each monitor|
|`monitors`|List of all monitors. Detailed description below|
|`alerts`|List of all alerts. Detailed description below|
@@ -114,7 +111,7 @@ minitor -metrics -metrics-port 3000
## Contributing
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
Whether you're looking to submit a patch or just tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
@@ -146,25 +143,15 @@ alerts:
command: 'echo {{.MonitorName}}'
```
Interval durations have changed from being an integer number of seconds to a duration string supported by Go, for example:
minitor-py:
```yaml
check_interval: 90
```
minitor-go:
```yaml
check_interval: 1m30s
```
For the time being, legacy configs for the Python version of Minitor should be compatible if you apply the `-py-compat` flag when running Minitor. Eventually, this flag will go away when later breaking changes are introduced.
## Future
Future, potentially breaking changes
- [ ] Implement leveled logging (maybe glog or logrus)
- [ ] Consider value of templating vs injecting values into Env variables
- [ ] Async checking
- [ ] Revisit metrics and see if they all make sense
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging (Breaking)
- [ ] Use durations rather than seconds checked in event loop (Potentially breaking)
+18 -48
View File
@@ -2,21 +2,12 @@ package main
import (
"bytes"
"errors"
"fmt"
"log"
"os/exec"
"strings"
"text/template"
"time"
"git.iamthefij.com/iamthefij/slog"
)
var (
errNoTemplate = errors.New("no template")
// ErrAlertFailed indicates that an alert failed to send
ErrAlertFailed = errors.New("alert failed")
)
// Alert is a config driven mechanism for sending a notice
@@ -29,12 +20,12 @@ type Alert struct {
// AlertNotice captures the context for an alert to be sent
type AlertNotice struct {
MonitorName string
AlertCount int16
FailureCount int16
IsUp bool
LastSuccess time.Time
MonitorName string
LastCheckOutput string
LastSuccess time.Time
IsUp bool
}
// IsValid returns a boolean indicating if the Alert has been correctly
@@ -54,33 +45,29 @@ func (alert *Alert) BuildTemplates() error {
"{last_success}", "{{.LastSuccess}}",
"{monitor_name}", "{{.MonitorName}}",
)
slog.Debugf("Building template for alert %s", alert.Name)
switch {
case alert.commandTemplate == nil && alert.Command.Command != nil:
if LogDebug {
log.Printf("DEBUG: Building template for alert %s", alert.Name)
}
if alert.commandTemplate == nil && alert.Command.Command != nil {
alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command.Command {
if PyCompat {
cmdPart = legacy.Replace(cmdPart)
}
alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
))
}
case alert.commandShellTemplate == nil && alert.Command.ShellCommand != "":
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" {
shellCmd := alert.Command.ShellCommand
if PyCompat {
shellCmd = legacy.Replace(shellCmd)
}
alert.commandShellTemplate = template.Must(
template.New(alert.Name).Parse(shellCmd),
)
default:
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
} else {
return fmt.Errorf("No template provided for alert %s", alert.Name)
}
return nil
@@ -88,40 +75,30 @@ func (alert *Alert) BuildTemplates() error {
// Send will send an alert notice by executing the command template
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
var cmd *exec.Cmd
switch {
case alert.commandTemplate != nil:
if alert.commandTemplate != nil {
command := []string{}
for _, cmdTmp := range alert.commandTemplate {
var commandBuffer bytes.Buffer
err = cmdTmp.Execute(&commandBuffer, notice)
if err != nil {
return
}
command = append(command, commandBuffer.String())
}
cmd = exec.Command(command[0], command[1:]...)
case alert.commandShellTemplate != nil:
} else if alert.commandShellTemplate != nil {
var commandBuffer bytes.Buffer
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
if err != nil {
return
}
shellCommand := commandBuffer.String()
cmd = ShellCommand(shellCommand)
default:
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
} else {
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
return
}
@@ -133,15 +110,8 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
var output []byte
output, err = cmd.CombinedOutput()
outputStr = string(output)
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
if err != nil {
err = fmt.Errorf(
"Alert '%s' failed to send. Returned %v: %w",
alert.Name,
err,
ErrAlertFailed,
)
if LogDebug {
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
}
return outputStr, err
+1 -16
View File
@@ -18,13 +18,11 @@ func TestAlertIsValid(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
actual := c.alert.IsValid()
if actual != c.expected {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -102,28 +100,19 @@ func TestAlertSend(t *testing.T) {
log.Printf("Testing case %s", c.name)
// Set PyCompat to value of compat flag
PyCompat = c.pyCompat
err := c.alert.BuildTemplates()
if err != nil {
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
}
c.alert.BuildTemplates()
output, err := c.alert.Send(c.notice)
hasErr := (err != nil)
if output != c.expectedOutput {
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
log.Printf("Case failed: %s", c.name)
}
if hasErr != c.expectErr {
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
log.Printf("Case failed: %s", c.name)
}
// Set PyCompat back to default value
PyCompat = false
log.Println("-----")
}
}
@@ -131,12 +120,10 @@ func TestAlertSend(t *testing.T) {
func TestAlertSendNoTemplates(t *testing.T) {
alert := Alert{}
notice := AlertNotice{}
output, err := alert.Send(notice)
if err == nil {
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
}
log.Println("-----")
}
@@ -155,12 +142,10 @@ func TestAlertBuildTemplate(t *testing.T) {
log.Printf("Testing case %s", c.name)
err := c.alert.BuildTemplates()
hasErr := (err != nil)
if hasErr != c.expectErr {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
+20 -88
View File
@@ -3,23 +3,16 @@ package main
import (
"errors"
"io/ioutil"
"time"
"log"
"git.iamthefij.com/iamthefij/slog"
"gopkg.in/yaml.v2"
)
var errInvalidConfig = errors.New("Invalid configuration")
// Config type is contains all provided user configuration
type Config struct {
CheckInterval SecondsOrDuration `yaml:"check_interval"`
DefaultAlertAfter int16 `yaml:"default_alert_after"`
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
DefaultAlertDown []string `yaml:"default_alert_down"`
DefaultAlertUp []string `yaml:"default_alert_up"`
Monitors []*Monitor
Alerts map[string]*Alert
CheckInterval int64 `yaml:"check_interval"`
Monitors []*Monitor
Alerts map[string]*Alert
}
// CommandOrShell type wraps a string or list of strings
@@ -42,48 +35,17 @@ func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) erro
// Error indicates this is shell command
if err != nil {
var shellCmd string
err := unmarshal(&shellCmd)
if err != nil {
return err
}
cos.ShellCommand = shellCmd
} else {
cos.Command = cmd
}
return nil
}
// SecondsOrDuration wraps a duration value for parsing a duration or seconds from YAML
// NOTE: This should be removed in favor of only parsing durations once compatibility is broken
type SecondsOrDuration struct {
value time.Duration
}
// Value returns a duration value
func (sod SecondsOrDuration) Value() time.Duration {
return sod.value
}
// UnmarshalYAML allows unmarshalling a duration value or seconds if an int was provided
func (sod *SecondsOrDuration) UnmarshalYAML(unmarshal func(interface{}) error) error {
var seconds int64
err := unmarshal(&seconds)
if err == nil {
sod.value = time.Second * time.Duration(seconds)
return nil
}
// Error indicates that we don't have an int
err = unmarshal(&sod.value)
return err
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() (isValid bool) {
isValid = true
@@ -91,75 +53,47 @@ func (config Config) IsValid() (isValid bool) {
// Validate alerts
if config.Alerts == nil || len(config.Alerts) == 0 {
// This should never happen because there is a default alert named 'log' for now
slog.Errorf("Invalid alert configuration: Must provide at least one alert")
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
isValid = false
}
for _, alert := range config.Alerts {
if !alert.IsValid() {
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
isValid = false
} else {
slog.Debugf("Loaded alert %s", alert.Name)
}
}
// Validate monitors
if config.Monitors == nil || len(config.Monitors) == 0 {
slog.Errorf("Invalid monitor configuration: Must provide at least one monitor")
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
isValid = false
}
for _, monitor := range config.Monitors {
if !monitor.IsValid() {
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
isValid = false
}
// Check that all Monitor alerts actually exist
for _, isUp := range []bool{true, false} {
for _, alertName := range monitor.GetAlertNames(isUp) {
if _, ok := config.Alerts[alertName]; !ok {
slog.Errorf(
"Invalid monitor configuration: %s. Unknown alert %s",
log.Printf(
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
monitor.Name, alertName,
)
isValid = false
}
}
}
}
return isValid
return
}
// Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) {
for _, monitor := range config.Monitors {
if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
monitor.AlertAfter = config.DefaultAlertAfter
}
if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
monitor.AlertEvery = config.DefaultAlertEvery
}
if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
monitor.AlertDown = config.DefaultAlertDown
}
if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
monitor.AlertUp = config.DefaultAlertUp
}
}
for name, alert := range config.Alerts {
alert.Name = name
if err = alert.BuildTemplates(); err != nil {
return
}
@@ -180,30 +114,28 @@ func LoadConfig(filePath string) (config Config, err error) {
return
}
slog.Debugf("Config values:\n%v\n", config)
if LogDebug {
log.Printf("DEBUG: Config values:\n%v\n", config)
}
// Add log alert if not present
if PyCompat {
// Initialize alerts list if not present
// Intialize alerts list if not present
if config.Alerts == nil {
config.Alerts = map[string]*Alert{}
}
if _, ok := config.Alerts["log"]; !ok {
config.Alerts["log"] = NewLogAlert()
}
}
// Finish initializing configuration
if err = config.Init(); err != nil {
return
}
if !config.IsValid() {
err = errInvalidConfig
err = errors.New("Invalid configuration")
return
}
return config, err
// Finish initializing configuration
err = config.Init()
return
}
+1 -42
View File
@@ -3,7 +3,6 @@ package main
import (
"log"
"testing"
"time"
)
func TestLoadConfig(t *testing.T) {
@@ -14,7 +13,6 @@ func TestLoadConfig(t *testing.T) {
pyCompat bool
}{
{"./test/valid-config.yml", false, "Valid config file", false},
{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
{"./test/does-not-exist", true, "Invalid config path", false},
@@ -29,50 +27,20 @@ func TestLoadConfig(t *testing.T) {
PyCompat = c.pyCompat
_, err := LoadConfig(c.configPath)
hasErr := (err != nil)
if hasErr != c.expectErr {
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
}
// Set PyCompat to default value
PyCompat = false
log.Println("-----")
}
}
func TestIntervalParsing(t *testing.T) {
log.Printf("Testing case TestIntervalParsing")
config, err := LoadConfig("./test/valid-config.yml")
if err != nil {
t.Errorf("Failed loading config: %v", err)
}
oneSecond := time.Second
tenSeconds := 10 * time.Second
oneMinute := time.Minute
// validate top level interval seconds represented as an int
if config.CheckInterval.Value() != oneSecond {
t.Errorf("Incorrectly parsed int seconds. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
if config.Monitors[0].CheckInterval.Value() != tenSeconds {
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
if config.Monitors[1].CheckInterval.Value() != oneMinute {
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
log.Println("-----")
}
// TestMultiLineConfig is a more complicated test stepping through the parsing
// and execution of mutli-line strings presented in YAML
func TestMultiLineConfig(t *testing.T) {
log.Println("Testing multi-line string config")
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
if err != nil {
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
@@ -80,10 +48,8 @@ func TestMultiLineConfig(t *testing.T) {
log.Println("-----")
log.Println("TestMultiLineConfig(parse > string)")
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
actual := config.Monitors[0].Command.ShellCommand
if expected != actual {
t.Errorf("TestMultiLineConfig(>) failed")
t.Logf("string expected=`%v`", expected)
@@ -94,15 +60,12 @@ func TestMultiLineConfig(t *testing.T) {
log.Println("-----")
log.Println("TestMultiLineConfig(execute > string)")
_, notice := config.Monitors[0].Check()
if notice == nil {
t.Fatalf("Did not receive an alert notice")
}
expected = "Some string with stuff\n<angle brackets>\n"
actual = notice.LastCheckOutput
if expected != actual {
t.Errorf("TestMultiLineConfig(execute > string) check failed")
t.Logf("string expected=`%v`", expected)
@@ -113,10 +76,8 @@ func TestMultiLineConfig(t *testing.T) {
log.Println("-----")
log.Println("TestMultiLineConfig(parse | string)")
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
actual = config.Alerts["log_shell"].Command.ShellCommand
if expected != actual {
t.Errorf("TestMultiLineConfig(|) failed")
t.Logf("string expected=`%v`", expected)
@@ -127,12 +88,10 @@ func TestMultiLineConfig(t *testing.T) {
log.Println("-----")
log.Println("TestMultiLineConfig(execute | string)")
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
if err != nil {
t.Errorf("Execution of alert failed")
}
expected = "Some string with stuff\n<angle brackets>\n"
if expected != actual {
t.Errorf("TestMultiLineConfig(execute | string) check failed")
+1 -2
View File
@@ -1,9 +1,8 @@
module git.iamthefij.com/iamthefij/minitor-go
go 1.15
go 1.12
require (
git.iamthefij.com/iamthefij/slog v1.3.0
github.com/prometheus/client_golang v1.2.1
gopkg.in/yaml.v2 v2.2.4
)
-2
View File
@@ -1,5 +1,3 @@
git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
+58 -64
View File
@@ -1,15 +1,16 @@
package main
import (
"errors"
"flag"
"fmt"
"log"
"time"
"git.iamthefij.com/iamthefij/slog"
)
var (
// LogDebug will control whether debug messsages should be logged
LogDebug = false
// ExportMetrics will track whether or not we want to export metrics to prometheus
ExportMetrics = false
// MetricsPort is the port to expose metrics on
@@ -22,67 +23,57 @@ var (
// version of minitor being run
version = "dev"
errUnknownAlert = errors.New("unknown alert")
)
func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
if alertNames == nil {
// This should only happen for a recovery alert. AlertDown is validated not empty
slog.Warningf(
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
alertNotice.MonitorName, alertNotice.IsUp,
)
return nil
}
for _, alertName := range alertNames {
if alert, ok := config.Alerts[alertName]; ok {
output, err := alert.Send(*alertNotice)
if err != nil {
slog.Errorf(
"Alert '%s' failed. result=%v: output=%s",
alert.Name,
err,
output,
)
return err
}
// Count alert metrics
Metrics.CountAlert(monitor.Name, alert.Name)
} else {
// This case should never actually happen since we validate against it
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
}
}
return nil
}
func checkMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
Metrics.CountCheck(monitor.Name, success, hasAlert)
// Should probably consider refactoring everything below here
if alertNotice != nil {
err := sendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, exit early and bubble it up
if err != nil {
return err
if LogDebug {
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
}
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
if alertNames == nil {
// This should only happen for a recovery alert. AlertDown is validated not empty
log.Printf(
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
alertNotice.MonitorName, alertNotice.IsUp,
)
}
for _, alertName := range alertNames {
if alert, ok := config.Alerts[alertName]; ok {
output, err := alert.Send(*alertNotice)
if err != nil {
log.Printf(
"ERROR: Alert '%s' failed. result=%v: output=%s",
alert.Name,
err,
output,
)
return fmt.Errorf(
"Unsuccessfully triggered alert '%s'. "+
"Crashing to avoid false negatives: %v",
alert.Name,
err,
)
}
// Count alert metrics
Metrics.CountAlert(monitor.Name, alert.Name)
} else {
// This case should never actually happen since we validate against it
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
}
}
}
}
@@ -92,38 +83,41 @@ func checkMonitors(config *Config) error {
}
func main() {
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
// Get debug flag
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
flag.IntVar(&MetricsPort, "metrics-port", 8080, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
var configPath = flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
flag.Parse()
// Print version if flag is provided
if *showVersion {
fmt.Println("Minitor version:", version)
log.Println("Minitor version:", version)
return
}
// Load configuration
config, err := LoadConfig(*configPath)
slog.OnErrFatalf(err, "Error loading config: %v", err)
if err != nil {
log.Fatalf("Error loading config: %v", err)
}
// Serve metrics exporter, if specified
if ExportMetrics {
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
log.Println("INFO: Exporting metrics to Prometheus")
go ServeMetrics()
}
// Start main loop
for {
err = checkMonitors(&config)
slog.OnErrPanicf(err, "Error checking monitors")
if err != nil {
panic(err)
}
time.Sleep(config.CheckInterval.Value())
sleepTime := time.Duration(config.CheckInterval) * time.Second
time.Sleep(sleepTime)
}
}
+22 -44
View File
@@ -16,7 +16,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
{
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
},
@@ -28,24 +28,36 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertAfter: 1,
},
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"unknown"},
AlertAfter: 1,
},
},
},
expectErr: false,
name: "Monitor failure, no alerts",
name: "Monitor failure, no and unknown alerts",
},
{
config: Config{
Monitors: []*Monitor{
{
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"ls"}},
alertCount: 1,
},
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
alertCount: 1,
},
},
},
expectErr: false,
@@ -54,35 +66,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"unknown"},
AlertAfter: 1,
},
},
},
expectErr: true,
name: "Monitor failure, unknown alerts",
},
{
config: Config{
Monitors: []*Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
alertCount: 1,
},
},
},
expectErr: true,
name: "Monitor recovery, unknown alerts",
},
{
config: Config{
Monitors: []*Monitor{
{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"good"},
@@ -90,7 +74,7 @@ func TestCheckMonitors(t *testing.T) {
},
},
Alerts: map[string]*Alert{
"good": {
"good": &Alert{
Command: CommandOrShell{Command: []string{"true"}},
},
},
@@ -101,7 +85,7 @@ func TestCheckMonitors(t *testing.T) {
{
config: Config{
Monitors: []*Monitor{
{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"},
@@ -109,7 +93,7 @@ func TestCheckMonitors(t *testing.T) {
},
},
Alerts: map[string]*Alert{
"bad": {
"bad": &Alert{
Name: "bad",
Command: CommandOrShell{Command: []string{"false"}},
},
@@ -121,16 +105,10 @@ func TestCheckMonitors(t *testing.T) {
}
for _, c := range cases {
err := c.config.Init()
if err != nil {
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
}
err = checkMonitors(&c.config)
c.config.Init()
err := checkMonitors(&c.config)
if err == nil && c.expectErr {
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
} else if err != nil && !c.expectErr {
t.Errorf("checkMonitors(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
}
}
}
+1 -17
View File
@@ -19,7 +19,6 @@ import (
type MinitorMetrics struct {
alertCount *prometheus.CounterVec
checkCount *prometheus.CounterVec
checkTime *prometheus.GaugeVec
monitorStatus *prometheus.GaugeVec
}
@@ -41,13 +40,6 @@ func NewMetrics() *MinitorMetrics {
},
[]string{"monitor", "status", "is_alert"},
),
checkTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_check_milliseconds",
Help: "Time in miliseconds that a check ran for",
},
[]string{"monitor", "status"},
),
monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_monitor_up_count",
@@ -60,7 +52,6 @@ func NewMetrics() *MinitorMetrics {
// Register newly created metrics
prometheus.MustRegister(metrics.alertCount)
prometheus.MustRegister(metrics.checkCount)
prometheus.MustRegister(metrics.checkTime)
prometheus.MustRegister(metrics.monitorStatus)
return metrics
@@ -72,12 +63,11 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
if isUp {
val = 1.0
}
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
}
// CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
status := "failure"
if isSuccess {
status = "success"
@@ -91,10 +81,6 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int
metrics.checkCount.With(
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
).Inc()
metrics.checkTime.With(
prometheus.Labels{"monitor": monitor, "status": status},
).Set(float64(ms))
}
// CountAlert counts an alert
@@ -110,8 +96,6 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
// ServeMetrics starts an http server with a Prometheus metrics handler
func ServeMetrics() {
http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
+41 -50
View File
@@ -1,31 +1,28 @@
package main
import (
"log"
"math"
"os/exec"
"time"
"git.iamthefij.com/iamthefij/slog"
)
// Monitor represents a particular periodic check of a command
type Monitor struct { //nolint:maligned
type Monitor struct {
// Config values
AlertAfter int16 `yaml:"alert_after"`
AlertEvery *int16 `yaml:"alert_every"`
CheckInterval SecondsOrDuration `yaml:"check_interval"`
Name string
Command CommandOrShell
AlertDown []string `yaml:"alert_down"`
AlertUp []string `yaml:"alert_up"`
Command CommandOrShell
CheckInterval float64 `yaml:"check_interval"`
AlertAfter int16 `yaml:"alert_after"`
AlertEvery int16 `yaml:"alert_every"`
// Other values
alertCount int16
failureCount int16
lastCheck time.Time
lastSuccess time.Time
lastOutput string
lastCheckDuration time.Duration
lastCheck time.Time
lastOutput string
alertCount int16
failureCount int16
lastSuccess time.Time
}
// IsValid returns a boolean indicating if the Monitor has been correctly
@@ -43,9 +40,8 @@ func (monitor Monitor) ShouldCheck() bool {
return true
}
sinceLastCheck := time.Since(monitor.lastCheck)
return sinceLastCheck >= monitor.CheckInterval.Value()
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
return sinceLastCheck >= monitor.CheckInterval
}
// Check will run the command configured by the Monitor and return a status
@@ -58,14 +54,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
cmd = ShellCommand(monitor.Command.ShellCommand)
}
checkStartTime := time.Now()
output, err := cmd.CombinedOutput()
monitor.lastCheck = time.Now()
monitor.lastOutput = string(output)
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
var alertNotice *AlertNotice
isSuccess := (err == nil)
if isSuccess {
alertNotice = monitor.success()
@@ -73,11 +66,17 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
alertNotice = monitor.failure()
}
slog.Debugf("Command output: %s", monitor.lastOutput)
slog.OnErrWarnf(err, "Command result: %v", err)
if LogDebug {
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
}
if err != nil {
if LogDebug {
log.Printf("DEBUG: Command result: %v", err)
}
}
slog.Infof(
"%s success=%t, alert=%t",
log.Printf(
"INFO: %s success=%t, alert=%t",
monitor.Name,
isSuccess,
alertNotice != nil,
@@ -91,17 +90,11 @@ func (monitor Monitor) IsUp() bool {
return monitor.alertCount == 0
}
// LastCheckMilliseconds gives number of miliseconds the last check ran for
func (monitor Monitor) LastCheckMilliseconds() int64 {
return monitor.lastCheckDuration.Milliseconds()
}
func (monitor *Monitor) success() (notice *AlertNotice) {
if !monitor.IsUp() {
// Alert that we have recovered
notice = monitor.createAlertNotice(true)
}
monitor.failureCount = 0
monitor.alertCount = 0
monitor.lastSuccess = time.Now()
@@ -113,14 +106,15 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
monitor.failureCount++
// If we haven't hit the minimum failures, we can exit
if monitor.failureCount < monitor.getAlertAfter() {
slog.Debugf(
"%s failed but did not hit minimum failures. "+
"Count: %v alert after: %v",
monitor.Name,
monitor.failureCount,
monitor.getAlertAfter(),
)
if LogDebug {
log.Printf(
"DEBUG: %s failed but did not hit minimum failures. "+
"Count: %v alert after: %v",
monitor.Name,
monitor.failureCount,
monitor.getAlertAfter(),
)
}
return
}
@@ -128,20 +122,19 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
failureCount := (monitor.failureCount - monitor.getAlertAfter())
// Use alert cadence to determine if we should alert
switch {
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
if monitor.AlertEvery > 0 {
// Handle integer number of failures before alerting
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
} else if monitor.AlertEvery == 0 {
// Handle alerting on first failure only
if failureCount == 0 {
notice = monitor.createAlertNotice(false)
}
case *monitor.AlertEvery > 0:
// Handle integer number of failures before alerting
if failureCount%*monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
default:
} else {
// Handle negative numbers indicating an exponential backoff
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
notice = monitor.createAlertNotice(false)
}
}
@@ -151,7 +144,7 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
monitor.alertCount++
}
return notice
return
}
func (monitor Monitor) getAlertAfter() int16 {
@@ -160,7 +153,6 @@ func (monitor Monitor) getAlertAfter() int16 {
if monitor.AlertAfter == 0 {
return 1
}
return monitor.AlertAfter
}
@@ -169,7 +161,6 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
if up {
return monitor.AlertUp
}
return monitor.AlertDown
}
+20 -48
View File
@@ -22,13 +22,11 @@ func TestMonitorIsValid(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
actual := c.monitor.IsValid()
if actual != c.expected {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -45,9 +43,9 @@ func TestMonitorShouldCheck(t *testing.T) {
name string
}{
{Monitor{}, true, "Empty"},
{Monitor{lastCheck: timeNow, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, true, "-20s"},
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
}
for _, c := range cases {
@@ -73,13 +71,11 @@ func TestMonitorIsUp(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
actual := c.monitor.IsUp()
if actual != c.expected {
t.Errorf("IsUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -100,13 +96,11 @@ func TestMonitorGetAlertNames(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
actual := c.monitor.GetAlertNames(c.up)
if !EqualSliceString(actual, c.expected) {
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -125,15 +119,12 @@ func TestMonitorSuccess(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
notice := c.monitor.success()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -141,32 +132,27 @@ func TestMonitorSuccess(t *testing.T) {
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
// hitting the threshold provided by AlertAfter
func TestMonitorFailureAlertAfter(t *testing.T) {
var alertEvery int16 = 1
cases := []struct {
monitor Monitor
expectNotice bool
name string
}{
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
}
for _, c := range cases {
log.Printf("Testing case %s", c.name)
notice := c.monitor.failure()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -174,11 +160,6 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
// TestMonitorFailureAlertEvery tests that alerts will trigger
// on the expected intervals
func TestMonitorFailureAlertEvery(t *testing.T) {
var alertEvery0, alertEvery1, alertEvery2 int16
alertEvery0 = 0
alertEvery1 = 1
alertEvery2 = 2
cases := []struct {
monitor Monitor
expectNotice bool
@@ -193,20 +174,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
*/
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
// Alert first time only, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
// Alert every time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
// Alert every other time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
}
for _, c := range cases {
@@ -214,12 +195,10 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
notice := c.monitor.failure()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -227,8 +206,6 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
// TestMonitorFailureExponential tests that alerts will trigger
// with an exponential backoff after repeated failures
func TestMonitorFailureExponential(t *testing.T) {
var alertEveryExp int16 = -1
cases := []struct {
expectNotice bool
name string
@@ -245,19 +222,16 @@ func TestMonitorFailureExponential(t *testing.T) {
// Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp}
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
for _, c := range cases {
log.Printf("Testing case %s", c.name)
notice := monitor.failure()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
@@ -269,7 +243,6 @@ func TestMonitorCheck(t *testing.T) {
hasNotice bool
lastOutput string
}
cases := []struct {
monitor Monitor
expect expected
@@ -317,7 +290,6 @@ func TestMonitorCheck(t *testing.T) {
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
+5 -6
View File
@@ -3,14 +3,14 @@ check_interval: 5
monitors:
- name: Fake Website
command: ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 10 # Must be at minimum the global `check_interval`
check_interval: 10 # Must be at minimum the global `check_interval`
alert_after: 3
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
- name: Real Website
command: ["curl", "-s", "-o", "/dev/null", "https://google.com"]
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 5
@@ -23,8 +23,7 @@ alerts:
log_up:
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
email_up:
command:
[sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
mailgun_down:
command: >
curl -s -X POST
-12
View File
@@ -1,12 +0,0 @@
---
check_interval: 1
default_alert_down: ["log_command"]
default_alert_after: 1
monitors:
- name: Command
command: ["echo", "$PATH"]
alerts:
log_command:
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
+4 -6
View File
@@ -3,23 +3,21 @@ check_interval: 1
monitors:
- name: Command
command: ["echo", "$PATH"]
alert_down: ["log_command", "log_shell"]
command: ['echo', '$PATH']
alert_down: ['log_command', 'log_shell']
alert_every: 0
check_interval: 10s
- name: Shell
command: >
echo 'Some string with stuff';
echo 'another line';
echo $PATH;
exit 1
alert_down: ["log_command", "log_shell"]
alert_down: ['log_command', 'log_shell']
alert_after: 5
alert_every: 0
check_interval: 1m
alerts:
log_command:
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
log_shell:
command: echo "Failure on {{.MonitorName}} User is $USER"
+1 -3
View File
@@ -8,7 +8,7 @@ import (
// ShellCommand takes a string and executes it as a command using `sh`
func ShellCommand(command string) *exec.Cmd {
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
//log.Printf("Shell command: %v", shellCommand)
return exec.Command(shellCommand[0], shellCommand[1:]...)
}
@@ -17,12 +17,10 @@ func EqualSliceString(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i, val := range a {
if val != b[i] {
return false
}
}
return true
}