Compare commits

..

2 Commits

Author SHA1 Message Date
Ian Fijolek
0a0f6fe7c9 Remove command_shell key from example yaml 2020-02-16 13:30:43 -08:00
Ian Fijolek
d4e2cb7b9f Switch to a single key for command and command shell
This makes the configuration more similar to Minitor-py and
docker-compose. If a string is passed, it will be executed in a shell.
If an array is passed, it will be executed in as a command directly.

This breaks compatiblity with previous versions of Minitor-go, but
closer to compatiblity with Minitor-py.
2020-02-16 13:25:11 -08:00
38 changed files with 1001 additions and 1475 deletions
+67 -53
View File
@@ -3,15 +3,31 @@ kind: pipeline
name: test
steps:
- name: test
image: golang:1.20
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
image: golang:1.12
commands:
- make build
- make test
- name: check
image: iamthefij/drone-pre-commit:personal
image: python:3
commands:
- pip install pre-commit==1.20.0
- make check
- name: notify
image: drillster/drone-email
settings:
host:
from_secret: SMTP_HOST
username:
from_secret: SMTP_USER
password:
from_secret: SMTP_PASS
from: drone@iamthefij.com
when:
status: [changed, failure]
---
kind: pipeline
@@ -30,71 +46,69 @@ trigger:
steps:
- name: build all binaries
image: golang:1.20
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
image: golang:1.12
commands:
- make all
- name: compress binaries for release
image: ubuntu
commands:
- find ./dist -type f -executable -execdir tar -czvf {}.tar.gz {} \;
when:
event: tag
- name: upload gitea release
image: plugins/gitea-release
settings:
title: ${DRONE_TAG}
files: dist/*.tar.gz
checksum:
- md5
- sha1
- sha256
- sha512
base_url:
from_secret: gitea_base_url
api_key:
from_secret: gitea_token
when:
event: tag
- name: Build and publish docker images
image: thegeeklab/drone-docker-buildx
- name: push image - arm
image: plugins/docker
settings:
repo: iamthefij/minitor-go
auto_tag: true
platforms:
- linux/amd64
- linux/arm64
- linux/arm
auto_tag_suffix: linux-arm
username:
from_secret: docker_username
password:
from_secret: docker_password
build_args:
- ARCH=arm
- REPO=arm32v7
- name: push image - arm64
image: plugins/docker
settings:
repo: iamthefij/minitor-go
auto_tag: true
auto_tag_suffix: linux-arm64
username:
from_secret: docker_username
password:
from_secret: docker_password
build_args:
- ARCH=arm64
- REPO=arm64v8
- name: push image - amd64
image: plugins/docker
settings:
repo: iamthefij/minitor-go
auto_tag: true
auto_tag_suffix: linux-amd64
username:
from_secret: docker_username
password:
from_secret: docker_password
---
kind: pipeline
name: notify
depends_on:
- test
- publish
trigger:
status:
- failure
steps:
- name: publish manifest
image: plugins/manifest
settings:
spec: manifest.tmpl
auto_tag: true
ignore_missing: true
username:
from_secret: docker_username
password:
from_secret: docker_password
- name: notify
image: drillster/drone-email
settings:
host:
from_secret: SMTP_HOST # pragma: whitelist secret
from_secret: SMTP_HOST
username:
from_secret: SMTP_USER # pragma: whitelist secret
from_secret: SMTP_USER
password:
from_secret: SMTP_PASS # pragma: whitelist secret
from_secret: SMTP_PASS
from: drone@iamthefij.com
when:
status: [changed, failure]
Vendored
+2 -2
View File
@@ -17,5 +17,5 @@ config.yml
# Output binary
minitor
minitor-go
dist/
minitor-linux-*
minitor-darwin-amd64
-36
View File
@@ -1,36 +0,0 @@
---
linters:
enable:
- errname
- errorlint
- exhaustive
- gofumpt
- goimports
- gomnd
- goprintffuncname
- misspell
- tagliatelle
- tenv
- testpackage
- thelper
- tparallel
- unconvert
- wrapcheck
- wsl
disable:
- gochecknoglobals
linters-settings:
gosec:
excludes:
- G204
tagliatelle:
case:
rules:
yaml: snake
issues:
exclude-rules:
- path: _test\.go
linters:
- gosec
+7 -8
View File
@@ -1,7 +1,7 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
rev: v2.4.0
hooks:
- id: check-added-large-files
- id: check-yaml
@@ -10,11 +10,10 @@ repos:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-merge-conflict
- repo: https://github.com/golangci/golangci-lint
rev: v1.52.2
- repo: git://github.com/dnephin/pre-commit-golang
rev: v0.3.5
hooks:
- id: golangci-lint
- repo: https://github.com/hadolint/hadolint
rev: v2.12.1-beta
hooks:
- id: hadolint
- id: go-fmt
- id: go-imports
# - id: gometalinter
# - id: golangci-lint
+9 -5
View File
@@ -1,10 +1,15 @@
FROM alpine:3.18
ARG REPO=library
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
FROM ${REPO}/alpine:3.10
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
RUN mkdir /app
WORKDIR /app/
# Add common checking tools
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2024a
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
@@ -14,9 +19,8 @@ COPY ./scripts /app/scripts
RUN chmod -R 755 /app/scripts
# Copy minitor in
ARG TARGETOS
ARG TARGETARCH
COPY ./dist/minitor-${TARGETOS}-${TARGETARCH} ./minitor
ARG ARCH=amd64
COPY ./minitor-linux-${ARCH} ./minitor
# Drop to non-root user
USER minitor
+9 -6
View File
@@ -1,5 +1,9 @@
FROM golang:1.20 AS builder
ARG REPO=library
FROM golang:1.12-alpine AS builder
RUN apk add --no-cache git=~2
RUN mkdir /app
WORKDIR /app
COPY ./go.mod ./go.sum /app/
@@ -7,13 +11,12 @@ RUN go mod download
COPY ./*.go /app/
ARG TARGETOS
ARG TARGETARCH
ARG ARCH=amd64
ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=${TARGETARCH}
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM alpine:3.18
FROM ${REPO}/alpine:3.10
RUN mkdir /app
WORKDIR /app/
@@ -21,7 +24,7 @@ WORKDIR /app/
COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2024a
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+44 -38
View File
@@ -1,43 +1,36 @@
DOCKER_TAG ?= minitor-go-${USER}
VERSION ?= $(shell git describe --tags --dirty)
GOFILES = *.go go.mod go.sum
# Multi-arch targets are generated from this
TARGET_ALIAS = minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 minitor-darwin-amd64
TARGETS = $(addprefix dist/,$(TARGET_ALIAS))
#
# Default make target will run tests
.DEFAULT_GOAL = test
GIT_TAG_NAME := $(shell git tag -l --contains HEAD)
GIT_SHA := $(shell git rev-parse HEAD)
VERSION := $(if $(GIT_TAG_NAME),$(GIT_TAG_NAME),$(GIT_SHA))
# Build all static Minitor binaries
.PHONY: all
all: $(TARGETS)
all: minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64
# Build all static Linux Minitor binaries. Used in Docker images
.PHONY: all-linux
all-linux: $(filter dist/minitor-linux-%,$(TARGETS))
# Build minitor for the current machine
minitor: $(GOFILES)
@echo Version: $(VERSION)
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
.PHONY: default
default: test
.PHONY: build
build: minitor
# Run minitor for the current machine
minitor:
@echo Version: $(VERSION)
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
.PHONY: run
run: minitor
run: minitor build
./minitor -debug
.PHONY: run-metrics
run-metrics: minitor
run-metrics: minitor build
./minitor -debug -metrics
# Run all tests
.PHONY: test
test:
go test -coverprofile=coverage.out
@echo
go tool cover -func=coverage.out
@echo
@# Check min coverage percentage
@go tool cover -func=coverage.out | awk -v target=80.0% \
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
@@ -46,7 +39,7 @@ test:
install-hooks:
pre-commit install --install-hooks
# Runs pre-commit checks on files
# Checks files for encryption
.PHONY: check
check:
pre-commit run --all-files
@@ -54,8 +47,9 @@ check:
.PHONY: clean
clean:
rm -f ./minitor
rm -f ./minitor-linux-*
rm -f ./minitor-darwin-amd64
rm -f ./coverage.out
rm -fr ./dist
.PHONY: docker-build
docker-build:
@@ -63,27 +57,39 @@ docker-build:
.PHONY: docker-run
docker-run: docker-build
docker run --rm -v $(shell pwd)/sample-config.hcl:/root/config.hcl $(DOCKER_TAG)
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG)
## Multi-arch targets
$(TARGETS): $(GOFILES)
mkdir -p ./dist
GOOS=$(word 2, $(subst -, ,$(@))) GOARCH=$(word 3, $(subst -, ,$(@))) CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o $@
.PHONY: $(TARGET_ALIAS)
$(TARGET_ALIAS):
$(MAKE) $(addprefix dist/,$@)
# Arch specific go build targets
minitor-darwin-amd64:
GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-darwin-amd64
minitor-linux-amd64:
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-linux-amd64
minitor-linux-arm:
GOOS=linux GOARCH=arm CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-linux-arm
minitor-linux-arm64:
GOOS=linux GOARCH=arm64 CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-linux-arm64
# Arch specific docker build targets
.PHONY: docker-build-arm
docker-build-arm: dist/minitor-linux-arm
docker build --platform linux/arm . -t ${DOCKER_TAG}-linux-arm
docker-build-arm: minitor-linux-arm
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
.PHONY: docker-build-arm64
docker-build-arm64: dist/minitor-linux-arm64
docker build --platform linux/arm64 . -t ${DOCKER_TAG}-linux-arm64
.PHONY: docker-build-arm
docker-build-arm64: minitor-linux-arm64
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64
# Cross run on host architechture
.PHONY: docker-run-arm
+74 -179
View File
@@ -1,184 +1,79 @@
# [minitor-go](https://git.iamthefij.com/iamthefij/minitor-go)
# minitor-go
A minimal monitoring system
## What does it do?
Minitor accepts an HCL configuration file with a set of commands to run and a set of alerts to execute when those commands fail. Minitor has a narow feature set and instead follows a principle to outsource to other command line tools when possible. Thus, it relies on other command line tools to do checks and issue alerts. To make getting started a bit easier, Minitor includes a few scripts to help with common tasks.
## But why?
I'm running a few small services and found Sensu, Consul, Nagios, etc. to all be far too complicated for my usecase.
## So how do I use it?
### Running
Install and execute with:
```bash
go install github.com/iamthefij/minitor-go@latest
minitor
```
If locally developing you can use:
```bash
make run
```
It will read the contents of `sample-config.hcl` and begin its loop. You could also run it directly and provide a new config file via the `-config` argument.
#### Docker
You can pull this repository directly from Docker:
```bash
docker pull iamthefij/minitor-go:latest
```
The Docker image uses a default `config.hcl` copied from `sample-config.hcl`. This won't really do anything for you, so when you run the Docker image, you should supply your own `config.hcl` file:
```bash
docker run -v $PWD/sample-config.hcl:/app/config.hcl iamthefij/minitor-go:latest
```
Images are provided for `amd64`, `arm`, and `arm64` architechtures.
You can configure the timezone for the container by passing a `TZ` env variable. Eg. `TZ=America/Los_Angeles`.
## Configuring
In this repo, you can explore the `sample-config.hcl` file for an example, but the general structure is as follows. It should be noted that environment variable interpolation happens on load of the HCL file.
The global configurations are:
|key|value|
|---|---|
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|`default_alert_every`|A default value used as an `alert_every` value for a monitor if not specified.|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|`monitor`|block listing monitors. Detailed description below|
|`alert`|List of all alerts. Detailed description below|
### Monitors
Represent your monitors as blocks with a label indicating the name of the monitor.
```hcl
monitor "example" {
command = ["echo", "Hello, World!"]
alert_down = ["log"]
alert_up = ["log"]
check_interval = "1m"
alert_after = 1
alert_every = 0
}
```
Each monitor allows the following configuration:
|key|value|
|---|---|
|`name`|Name of the monitor running. This will show up in messages and logs.|
|`command`|A list of strings representing a command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `shell_command`|
|`shell_command`|A single string that represents a shell command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `command`|
|`alert_down`|A list of Alerts to be triggered when the monitor is in a "down" state|
|`alert_up`|A list of Alerts to be triggered when the monitor moves to an "up" state|
|`check_interval`|The interval at which this monitor should be checked. This must be greater than the global `check_interval` value|
|`alert_after`|Allows specifying the number of failed checks before an alert should be triggered. A value of 1 will start sending alerts after the first failure.|
|`alert_every`|Allows specifying how often an alert should be retriggered. There are a few magic numbers here. Defaults to `-1` for an exponential backoff. Setting to `0` disables re-alerting. Positive values will allow retriggering after the specified number of checks|
### Alerts
Represent your alerts as blocks with a lable indicating the name of the alert. The name will be used in your monitor setup in `alert_down` and `alert_up`.
```hcl
monitor "example" {
command = ["false"]
alert_down = ["log"]
}
alert "log" {
shell_command = "echo '{{.MonitorName}} is down!'"
}
```
Each alert allows the following configuration:
|key|value|
|---|---|
|`command`|Specifies the command that should be executed in exec form. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `shell_command`|
|`shell_command`|Specifies a shell command as a single string. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `command`|
Also, when alerts are executed, they will be passed through Go's format function with arguments for some attributes of the Monitor. The following monitor specific variables can be referenced using Go formatting syntax:
|token|value|
|---|---|
|`{{.AlertCount}}`|Number of times this monitor has alerted|
|`{{.FailureCount}}`|The total number of sequential failed checks for this monitor|
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|`{{.LastSuccess}}`|The datetime of the last successful check as a go Time struct|
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
To provide flexible formatting, the following non-standard functions are available in templates:
|func|description|
|---|---|
|`ANSIC <Time>`|Formats provided time in ANSIC format|
|`UnixDate <Time>`|Formats provided time in UnixDate format|
|`RubyDate <Time>`|Formats provided time in RubyDate format|
|`RFC822Z <Time>`|Formats provided time in RFC822Z format|
|`RFC850 <Time>`|Formats provided time in RFC850 format|
|`RFC1123 <Time>`|Formats provided time in RFC1123 format|
|`RFC1123Z <Time>`|Formats provided time in RFC1123Z format|
|`RFC3339 <Time>`|Formats provided time in RFC3339 format|
|`RFC3339Nano <Time>`|Formats provided time in RFC3339Nano format|
|`FormatTime <Time> <string template>`|Formats provided time according to provided template|
|`InTZ <Time> <string timezone name>`|Converts provided time to parsed timezone from the provided name|
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
#### Running alerts on startup
It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
Eg.
```bash
minitor -startup-alerts=log_down,log_up -config ./config.hcl
```
### Metrics
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
It is also possible to use the metrics endpoint for monitoring Minitor itself! This allows setting up multiple instances of Minitor on different servers and have them monitor each-other so that you can detect a minitor outage.
To run minitor with metrics, use the `-metrics` flag. The metrics will be served on port `8080` by default, though it can be overriden using `-metrics-port`. They will be accessible on the path `/metrics`. Eg. `localhost:8080/metrics`.
```bash
minitor -metrics
# or
minitor -metrics -metrics-port 3000
```
## Contributing
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
Github Mirror: https://github.com/IamTheFij/minitor.git
## Original Minitor
This is a reimplementation of [Minitor](https://git.iamthefij.com/iamthefij/minitor) in Go
A reimplementation of [Minitor](https://git.iamthefij.com/iamthefij/minitor) in Go
Minitor is already a minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a large footprint. Thus Go feels like a better fit for the project, longer term.
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
## Differences from Python version
Templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct, `AlertNotice` defined in `alert.go` and the built in Go templating format. Eg.
minitor-py:
```yaml
alerts:
log_command:
command: ['echo', '{monitor_name}']
log_shell:
command: 'echo {monitor_name}'
```
minitor-go:
```yaml
alerts:
log_command:
command: ['echo', '{{.MonitorName}}']
log_shell:
command: 'echo {{.MonitorName}}'
```
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
minitor-py:
```yaml
alerts:
log_shell:
command: >
echo "line 1"
echo "line 2"
echo "continued" \
"line"
```
minitor-go:
```yaml
alerts:
log_shell:
command: >
echo "line 1";
echo "line 2";
echo "continued"
"line"
```
## To do
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
Pairity:
- [x] Run monitor commands
- [x] Run monitor commands in a shell
- [x] Run alert commands
- [x] Run alert commands in a shell
- [x] Allow templating of alert commands
- [x] Implement Prometheus client to export metrics
- [x] Test coverage
- [x] Integration testing (manual or otherwise)
- [x] Allow commands and shell commands in the same config key
Improvement (potentially breaking):
- [ ] Implement leveled logging (maybe glog or logrus)
- [ ] Consider switching from YAML to TOML
- [ ] Consider value of templating vs injecting values into Env variables
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging
- [ ] Async checking
- [ ] Use durations rather than seconds checked in event loop
- [ ] Revisit metrics and see if they all make sense
+27 -90
View File
@@ -2,142 +2,86 @@ package main
import (
"bytes"
"errors"
"fmt"
"log"
"os/exec"
"text/template"
"time"
"git.iamthefij.com/iamthefij/slog"
)
var (
errNoTemplate = errors.New("no template")
// ErrAlertFailed indicates that an alert failed to send
ErrAlertFailed = errors.New("alert failed")
)
// Alert is a config driven mechanism for sending a notice
type Alert struct {
Name string `hcl:"name,label"`
Command []string `hcl:"command,optional"`
ShellCommand string `hcl:"shell_command,optional"`
Name string
Command CommandOrShell
commandTemplate []*template.Template
commandShellTemplate *template.Template
}
// AlertNotice captures the context for an alert to be sent
type AlertNotice struct {
AlertCount int
FailureCount int
IsUp bool
LastSuccess time.Time
MonitorName string
AlertCount int16
FailureCount int16
LastCheckOutput string
LastSuccess time.Time
IsUp bool
}
// IsValid returns a boolean indicating if the Alert has been correctly
// configured
func (alert Alert) IsValid() bool {
hasAtLeastOneCommand := alert.Command != nil || alert.ShellCommand != ""
hasAtMostOneCommand := alert.Command == nil || alert.ShellCommand == ""
return hasAtLeastOneCommand && hasAtMostOneCommand
return !alert.Command.Empty()
}
// BuildTemplates compiles command templates for the Alert
func (alert *Alert) BuildTemplates() error {
slog.Debugf("Building template for alert %s", alert.Name)
// Time format func factory
tff := func(formatString string) func(time.Time) string {
return func(t time.Time) string {
return t.Format(formatString)
if LogDebug {
log.Printf("DEBUG: Building template for alert %s", alert.Name)
}
}
// Create some functions for formatting datetimes in popular formats
timeFormatFuncs := template.FuncMap{
"ANSIC": tff(time.ANSIC),
"UnixDate": tff(time.UnixDate),
"RubyDate": tff(time.RubyDate),
"RFC822Z": tff(time.RFC822Z),
"RFC850": tff(time.RFC850),
"RFC1123": tff(time.RFC1123),
"RFC1123Z": tff(time.RFC1123Z),
"RFC3339": tff(time.RFC3339),
"RFC3339Nano": tff(time.RFC3339Nano),
"FormatTime": func(t time.Time, timeFormat string) string {
return t.Format(timeFormat)
},
"InTZ": func(t time.Time, tzName string) (time.Time, error) {
tz, err := time.LoadLocation(tzName)
if err != nil {
return t, fmt.Errorf("failed to convert time to specified tz: %w", err)
}
return t.In(tz), nil
},
}
switch {
case alert.commandTemplate == nil && alert.Command != nil:
if alert.commandTemplate == nil && alert.Command.Command != nil {
alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command {
for i, cmdPart := range alert.Command.Command {
alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+fmt.Sprint(i)).Funcs(timeFormatFuncs).Parse(cmdPart),
template.New(alert.Name+string(i)).Parse(cmdPart),
))
}
case alert.commandShellTemplate == nil && alert.ShellCommand != "":
shellCmd := alert.ShellCommand
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" {
alert.commandShellTemplate = template.Must(
template.New(alert.Name).Funcs(timeFormatFuncs).Parse(shellCmd),
template.New(alert.Name).Parse(alert.Command.ShellCommand),
)
default:
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
} else {
return fmt.Errorf("No template provided for alert %s", alert.Name)
}
return nil
}
// Send will send an alert notice by executing the command template
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
func (alert Alert) Send(notice AlertNotice) (output_str string, err error) {
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
var cmd *exec.Cmd
switch {
case alert.commandTemplate != nil:
if alert.commandTemplate != nil {
command := []string{}
for _, cmdTmp := range alert.commandTemplate {
var commandBuffer bytes.Buffer
err = cmdTmp.Execute(&commandBuffer, notice)
if err != nil {
return
}
command = append(command, commandBuffer.String())
}
cmd = exec.Command(command[0], command[1:]...)
case alert.commandShellTemplate != nil:
} else if alert.commandShellTemplate != nil {
var commandBuffer bytes.Buffer
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
if err != nil {
return
}
shellCommand := commandBuffer.String()
cmd = ShellCommand(shellCommand)
default:
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
} else {
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
return
}
@@ -148,17 +92,10 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
var output []byte
output, err = cmd.CombinedOutput()
outputStr = string(output)
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
if err != nil {
err = fmt.Errorf(
"Alert %s failed to send. Returned %w: %w",
alert.Name,
err,
ErrAlertFailed,
)
output_str = string(output)
if LogDebug {
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, output_str)
}
return outputStr, err
return output_str, err
}
+35 -52
View File
@@ -1,68 +1,64 @@
package main_test
package main
import (
"log"
"testing"
m "git.iamthefij.com/iamthefij/minitor-go"
)
func TestAlertIsValid(t *testing.T) {
cases := []struct {
alert m.Alert
alert Alert
expected bool
name string
}{
{m.Alert{Command: []string{"echo", "test"}}, true, "Command only"},
{m.Alert{ShellCommand: "echo test"}, true, "CommandShell only"},
{m.Alert{}, false, "No commands"},
{Alert{Command: CommandOrShell{Command: []string{"echo", "test"}}}, true, "Command only"},
{Alert{Command: CommandOrShell{ShellCommand: "echo test"}}, true, "CommandShell only"},
{Alert{}, false, "No commands"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
log.Printf("Testing case %s", c.name)
actual := c.alert.IsValid()
if actual != c.expected {
t.Errorf("expected=%t actual=%t", c.expected, actual)
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
func TestAlertSend(t *testing.T) {
cases := []struct {
alert m.Alert
notice m.AlertNotice
alert Alert
notice AlertNotice
expectedOutput string
expectErr bool
name string
}{
{
m.Alert{Command: []string{"echo", "{{.MonitorName}}"}},
m.AlertNotice{MonitorName: "test"},
Alert{Command: CommandOrShell{Command: []string{"echo", "{{.MonitorName}}"}}},
AlertNotice{MonitorName: "test"},
"test\n",
false,
"Command with template",
},
{
m.Alert{ShellCommand: "echo {{.MonitorName}}"},
m.AlertNotice{MonitorName: "test"},
Alert{Command: CommandOrShell{ShellCommand: "echo {{.MonitorName}}"}},
AlertNotice{MonitorName: "test"},
"test\n",
false,
"Command shell with template",
},
{
m.Alert{Command: []string{"echo", "{{.Bad}}"}},
m.AlertNotice{MonitorName: "test"},
Alert{Command: CommandOrShell{Command: []string{"echo", "{{.Bad}}"}}},
AlertNotice{MonitorName: "test"},
"",
true,
"Command with bad template",
},
{
m.Alert{ShellCommand: "echo {{.Bad}}"},
m.AlertNotice{MonitorName: "test"},
Alert{Command: CommandOrShell{ShellCommand: "echo {{.Bad}}"}},
AlertNotice{MonitorName: "test"},
"",
true,
"Command shell with bad template",
@@ -70,64 +66,51 @@ func TestAlertSend(t *testing.T) {
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.alert.BuildTemplates()
if err != nil {
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
}
log.Printf("Testing case %s", c.name)
c.alert.BuildTemplates()
output, err := c.alert.Send(c.notice)
hasErr := (err != nil)
if output != c.expectedOutput {
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
log.Printf("Case failed: %s", c.name)
}
if hasErr != c.expectErr {
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
func TestAlertSendNoTemplates(t *testing.T) {
alert := m.Alert{}
notice := m.AlertNotice{}
alert := Alert{}
notice := AlertNotice{}
output, err := alert.Send(notice)
if err == nil {
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
}
log.Println("-----")
}
func TestAlertBuildTemplate(t *testing.T) {
cases := []struct {
alert m.Alert
alert Alert
expectErr bool
name string
}{
{m.Alert{Command: []string{"echo", "test"}}, false, "Command only"},
{m.Alert{ShellCommand: "echo test"}, false, "CommandShell only"},
{m.Alert{}, true, "No commands"},
{Alert{Command: CommandOrShell{Command: []string{"echo", "test"}}}, false, "Command only"},
{Alert{Command: CommandOrShell{ShellCommand: "echo test"}}, false, "CommandShell only"},
{Alert{}, true, "No commands"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
log.Printf("Testing case %s", c.name)
err := c.alert.BuildTemplates()
hasErr := (err != nil)
if hasErr != c.expectErr {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
+90 -133
View File
@@ -2,50 +2,97 @@ package main
import (
"errors"
"fmt"
"time"
"io/ioutil"
"log"
"git.iamthefij.com/iamthefij/slog"
/*
* "github.com/hashicorp/hcl/v2"
* "github.com/hashicorp/hcl/v2/gohcl"
*/
"github.com/hashicorp/hcl/v2/hclsimple"
"gopkg.in/yaml.v2"
)
var errInvalidConfig = errors.New("Invalid configuration")
// Config type is contains all provided user configuration
type Config struct {
CheckIntervalStr string `hcl:"check_interval"`
CheckInterval time.Duration
DefaultAlertAfter *int `hcl:"default_alert_after,optional"`
DefaultAlertEvery *int `hcl:"default_alert_every,optional"`
DefaultAlertDown []string `hcl:"default_alert_down,optional"`
DefaultAlertUp []string `hcl:"default_alert_up,optional"`
Monitors []*Monitor `hcl:"monitor,block"`
Alerts []*Alert `hcl:"alert,block"`
alertLookup map[string]*Alert
CheckInterval int64 `yaml:"check_interval"`
Monitors []*Monitor
Alerts map[string]*Alert
}
func (c Config) GetAlert(name string) (*Alert, bool) {
if c.alertLookup == nil {
c.alertLookup = map[string]*Alert{}
for _, alert := range c.Alerts {
c.alertLookup[alert.Name] = alert
// CommandOrShell type wraps a string or list of strings
// for executing a command directly or in a shell
type CommandOrShell struct {
ShellCommand string
Command []string
}
// Empty checks if the Command has a value
func (cos CommandOrShell) Empty() bool {
return (cos.ShellCommand == "" && cos.Command == nil)
}
// UnmarshalYAML allows unmarshalling either a string or slice of strings
// and parsing them as either a command or a shell command.
func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) error {
var cmd []string
err := unmarshal(&cmd)
// Error indicates this is shell command
if err != nil {
var shellCmd string
err := unmarshal(&shellCmd)
if err != nil {
return err
}
cos.ShellCommand = shellCmd
} else {
cos.Command = cmd
}
return nil
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() (isValid bool) {
isValid = true
// Validate monitors
if config.Monitors == nil || len(config.Monitors) == 0 {
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
isValid = false
}
for _, monitor := range config.Monitors {
if !monitor.IsValid() {
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
isValid = false
}
// Check that all Monitor alerts actually exist
for _, isUp := range []bool{true, false} {
for _, alertName := range monitor.GetAlertNames(isUp) {
if _, ok := config.Alerts[alertName]; !ok {
log.Printf(
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
monitor.Name, alertName,
)
isValid = false
}
}
}
}
v, ok := c.alertLookup[name]
// Validate alerts
if config.Alerts == nil || len(config.Alerts) == 0 {
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
isValid = false
}
for _, alert := range config.Alerts {
if !alert.IsValid() {
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
isValid = false
}
}
return v, ok
return
}
// BuildAllTemplates builds all alert templates
func (c *Config) BuildAllTemplates() (err error) {
for _, alert := range c.Alerts {
// Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) {
for name, alert := range config.Alerts {
alert.Name = name
if err = alert.BuildTemplates(); err != nil {
return
}
@@ -54,119 +101,29 @@ func (c *Config) BuildAllTemplates() (err error) {
return
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() (isValid bool) {
isValid = true
// Validate alerts
if len(config.Alerts) == 0 {
// This should never happen because there is a default alert named 'log' for now
slog.Errorf("Invalid alert configuration: Must provide at least one alert")
isValid = false
}
for _, alert := range config.Alerts {
if !alert.IsValid() {
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
isValid = false
}
}
// Validate monitors
if len(config.Monitors) == 0 {
slog.Errorf("Invalid monitor configuration: Must provide at least one monitor")
isValid = false
}
for _, monitor := range config.Monitors {
if !monitor.IsValid() {
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
isValid = false
}
// Check that all Monitor alerts actually exist
for _, isUp := range []bool{true, false} {
for _, alertName := range monitor.GetAlertNames(isUp) {
if _, ok := config.GetAlert(alertName); !ok {
slog.Errorf(
"Invalid monitor configuration: %s. Unknown alert %s",
monitor.Name, alertName,
)
isValid = false
}
}
}
}
return isValid
}
// Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) {
config.CheckInterval, err = time.ParseDuration(config.CheckIntervalStr)
if err != nil {
return fmt.Errorf("failed to parse top level check_interval duration: %w", err)
}
for _, monitor := range config.Monitors {
// TODO: Move this to a Monitor.Init() method
// Parse the check_interval string into a time.Duration
if monitor.CheckIntervalStr != nil {
monitor.CheckInterval, err = time.ParseDuration(*monitor.CheckIntervalStr)
if err != nil {
return fmt.Errorf("failed to parse check_interval duration for monitor %s: %w", monitor.Name, err)
}
}
// Set default values for monitor alerts
if monitor.AlertAfter == 0 && config.DefaultAlertAfter != nil {
monitor.AlertAfter = *config.DefaultAlertAfter
} else if monitor.AlertAfter == 0 {
monitor.AlertAfter = 1
}
if monitor.AlertEvery == nil {
monitor.AlertEvery = config.DefaultAlertEvery
}
if monitor.AlertDown == nil {
monitor.AlertDown = config.DefaultAlertDown
}
if monitor.AlertUp == nil {
monitor.AlertUp = config.DefaultAlertUp
}
}
err = config.BuildAllTemplates()
return
}
// LoadConfig will read config from the given path and parse it
func LoadConfig(filePath string) (config Config, err error) {
err = hclsimple.DecodeFile(filePath, nil, &config)
data, err := ioutil.ReadFile(filePath)
if err != nil {
return
}
slog.Debugf("Config values:\n%v\n", config)
// Finish initializing configuration
if err = config.Init(); err != nil {
err = yaml.Unmarshal(data, &config)
if err != nil {
return
}
if LogDebug {
log.Printf("DEBUG: Config values:\n%v\n", config)
}
if !config.IsValid() {
err = errInvalidConfig
err = errors.New("Invalid configuration")
return
}
return config, err
// Finish initializing configuration
err = config.Init()
return
}
+48 -55
View File
@@ -1,9 +1,8 @@
package main_test
package main
import (
"log"
"testing"
m "git.iamthefij.com/iamthefij/minitor-go"
)
func TestLoadConfig(t *testing.T) {
@@ -12,92 +11,86 @@ func TestLoadConfig(t *testing.T) {
expectErr bool
name string
}{
{"./test/valid-config.yml", false, "Valid config file"},
{"./test/does-not-exist", true, "Invalid config path"},
{"./test/invalid-config-missing-alerts.hcl", true, "Invalid config missing alerts"},
{"./test/invalid-config-type.hcl", true, "Invalid config type for key"},
{"./test/invalid-config-unknown-alert.hcl", true, "Invalid config unknown alert"},
{"./test/valid-config-default-values.hcl", false, "Valid config file with default values"},
{"./test/valid-config.hcl", false, "Valid config file"},
{"./test/invalid-config-type.yml", true, "Invalid config type for key"},
{"./test/invalid-config-missing-alerts.yml", true, "Invalid config missing alerts"},
{"./test/invalid-config-unknown-alert.yml", true, "Invalid config unknown alert"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
_, err := m.LoadConfig(c.configPath)
log.Printf("Testing case %s", c.name)
_, err := LoadConfig(c.configPath)
hasErr := (err != nil)
if hasErr != c.expectErr {
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
// TestMultiLineConfig is a more complicated test stepping through the parsing
// and execution of mutli-line strings presented in YAML
func TestMultiLineConfig(t *testing.T) {
t.Parallel()
config, err := m.LoadConfig("./test/valid-verify-multi-line.hcl")
log.Println("Testing multi-line string config")
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
if err != nil {
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
}
t.Run("Test Monitor with Indented Multi-Line String", func(t *testing.T) {
// Verify indented heredoc is as expected
expected := "echo 'Some string with stuff'\necho \"<angle brackets>\"\nexit 1\n"
actual := config.Monitors[0].ShellCommand
log.Println("-----")
log.Println("TestMultiLineConfig(parse > string)")
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
actual := config.Monitors[0].Command.ShellCommand
if expected != actual {
t.Error("Heredoc mismatch")
t.Errorf("string expected=`%v`", expected)
t.Errorf("string actual =`%v`", actual)
t.Errorf("TestMultiLineConfig(>) failed")
t.Logf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
}
// Run the monitor and verify the output
log.Println("-----")
log.Println("TestMultiLineConfig(execute > string)")
_, notice := config.Monitors[0].Check()
if notice == nil {
t.Fatal("Did not receive an alert notice and should have")
t.Fatalf("Did not receive an alert notice")
}
// Verify the output of the monitor is as expected
expected = "Some string with stuff\n<angle brackets>\n"
actual = notice.LastCheckOutput
if expected != actual {
t.Error("Output mismatch")
t.Errorf("string expected=`%v`", expected)
t.Errorf("string actual =`%v`", actual)
}
})
t.Run("Test Alert with Multi-Line String", func(t *testing.T) {
alert, ok := config.GetAlert("log_shell")
if !ok {
t.Fatal("Could not find expected alert 'log_shell'")
t.Errorf("TestMultiLineConfig(execute > string) check failed")
t.Logf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
}
expected := " echo 'Some string with stuff'\n echo '<angle brackets>'\n"
actual := alert.ShellCommand
log.Println("-----")
log.Println("TestMultiLineConfig(parse | string)")
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
actual = config.Alerts["log_shell"].Command.ShellCommand
if expected != actual {
t.Error("Heredoc mismatch")
t.Errorf("string expected=`%v`", expected)
t.Errorf("string actual =`%v`", actual)
t.Errorf("TestMultiLineConfig(|) failed")
t.Logf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
}
actual, err = alert.Send(m.AlertNotice{})
log.Println("-----")
log.Println("TestMultiLineConfig(execute | string)")
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
if err != nil {
t.Fatal("Execution of alert failed")
t.Errorf("Execution of alert failed")
}
expected = "Some string with stuff\n<angle brackets>\n"
if expected != actual {
t.Error("Output mismatch")
t.Errorf("string expected=`%v`", expected)
t.Errorf("string actual =`%v`", actual)
t.Errorf("TestMultiLineConfig(execute | string) check failed")
t.Logf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
}
})
}
+3 -20
View File
@@ -1,25 +1,8 @@
module git.iamthefij.com/iamthefij/minitor-go
go 1.20
go 1.12
require (
git.iamthefij.com/iamthefij/slog v1.3.0
github.com/hashicorp/hcl/v2 v2.11.1
github.com/prometheus/client_golang v1.19.0
)
require (
github.com/agext/levenshtein v1.2.1 // indirect
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect
github.com/prometheus/client_model v0.5.0 // indirect
github.com/prometheus/common v0.48.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect
github.com/zclconf/go-cty v1.8.0 // indirect
golang.org/x/sys v0.16.0 // indirect
golang.org/x/text v0.14.0 // indirect
google.golang.org/protobuf v1.32.0 // indirect
github.com/prometheus/client_golang v1.2.1
gopkg.in/yaml.v2 v2.2.4
)
+69 -68
View File
@@ -1,77 +1,78 @@
git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8=
github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw=
github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA=
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/hashicorp/hcl/v2 v2.11.1 h1:yTyWcXcm9XB0TEkyU/JCRU6rYy4K+mgLtzn2wlrJbcc=
github.com/hashicorp/hcl/v2 v2.11.1/go.mod h1:FwWsfWEjyV/CMj8s/gqAuiviY72rJ1/oayI9WftqcKg=
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4=
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
github.com/prometheus/client_golang v1.2.1 h1:JnMpQc6ppsNgw9QPAGF6Dod479itz7lvlsMzzNayLOI=
github.com/prometheus/client_golang v1.2.1/go.mod h1:XMU6Z2MjaRKVu/dC1qupJI9SiNkDYzz3xecMgSW/F+U=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLyCY=
github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.0.5 h1:3+auTFlqw+ZaQYJARz6ArODtkaIwtvBTx3N2NehQlL8=
github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8=
github.com/zclconf/go-cty v1.8.0 h1:s4AvqaeQzJIu3ndv4gVIhplVD0krU+bgrcLSVUnaWuA=
github.com/zclconf/go-cty v1.8.0/go.mod h1:vVKLxnk3puL4qRAv72AO+W99LUD4da90g3uUAzyuvAk=
github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b/go.mod h1:ZRKQfBXbGkpdV6QMzT3rU1kSTAnfu1dO8dPKjYprgj8=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
+48 -94
View File
@@ -1,16 +1,16 @@
package main
import (
"errors"
"flag"
"fmt"
"strings"
"log"
"time"
"git.iamthefij.com/iamthefij/slog"
)
var (
// LogDebug will control whether debug messsages should be logged
LogDebug = false
// ExportMetrics will track whether or not we want to export metrics to prometheus
ExportMetrics = false
// MetricsPort is the port to expose metrics on
@@ -20,144 +20,98 @@ var (
// version of minitor being run
version = "dev"
errUnknownAlert = errors.New("unknown alert")
)
func SendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
func checkMonitors(config *Config) error {
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, success)
Metrics.CountCheck(monitor.Name, success, hasAlert)
// Should probably consider refactoring everything below here
if alertNotice != nil {
if LogDebug {
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
}
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
if alertNames == nil {
// This should only happen for a recovery alert. AlertDown is validated not empty
slog.Warningf(
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
log.Printf(
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
alertNotice.MonitorName, alertNotice.IsUp,
)
return nil
}
for _, alertName := range alertNames {
if alert, ok := config.GetAlert(alertName); ok {
if alert, ok := config.Alerts[alertName]; ok {
output, err := alert.Send(*alertNotice)
if err != nil {
slog.Errorf(
"Alert '%s' failed. result=%v: output=%s",
log.Printf(
"ERROR: Alert '%s' failed. result=%v: output=%s",
alert.Name,
err,
output,
)
return err
return fmt.Errorf(
"Unsuccessfully triggered alert '%s'. "+
"Crashing to avoid false negatives: %v",
alert.Name,
err,
)
}
// Count alert metrics
Metrics.CountAlert(monitor.Name, alert.Name)
} else {
// This case should never actually happen since we validate against it
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
}
}
return nil
}
func CheckMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
if alertNotice != nil {
err := SendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, exit early and bubble it up
if err != nil {
return err
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
}
}
}
}
return nil
}
func SendStartupAlerts(config *Config, alertNames []string) error {
for _, alertName := range alertNames {
var err error
alert, ok := config.GetAlert(alertName)
if !ok {
err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
}
if err == nil {
_, err = alert.Send(AlertNotice{
AlertCount: 0,
FailureCount: 0,
IsUp: true,
LastSuccess: time.Now(),
MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name),
LastCheckOutput: "",
})
}
if err != nil {
return err
}
}
return nil
}
func main() {
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
// Get debug flag
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
flag.Parse()
// Print version if flag is provided
if *showVersion {
fmt.Println("Minitor version:", version)
log.Println("Minitor version:", version)
return
}
// Load configuration
config, err := LoadConfig(*configPath)
slog.OnErrFatalf(err, "Error loading config: %v", err)
config, err := LoadConfig("config.yml")
if err != nil {
log.Fatalf("Error loading config: %v", err)
}
// Serve metrics exporter, if specified
if ExportMetrics {
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
log.Println("INFO: Exporting metrics to Prometheus")
go ServeMetrics()
}
if *startupAlerts != "" {
alertNames := strings.Split(*startupAlerts, ",")
err = SendStartupAlerts(&config, alertNames)
slog.OnErrPanicf(err, "Error running startup alerts")
}
// Start main loop
for {
err = CheckMonitors(&config)
slog.OnErrPanicf(err, "Error checking monitors")
err = checkMonitors(&config)
if err != nil {
panic(err)
}
time.Sleep(config.CheckInterval)
sleepTime := time.Duration(config.CheckInterval) * time.Second
time.Sleep(sleepTime)
}
}
+85 -163
View File
@@ -1,192 +1,114 @@
package main_test
package main
import (
"testing"
import "testing"
m "git.iamthefij.com/iamthefij/minitor-go"
)
func Ptr[T any](v T) *T {
return &v
}
// TestCheckConfig tests the checkConfig function
// It also tests results for potentially invalid configuration. For example, no alerts
func TestCheckMonitors(t *testing.T) {
cases := []struct {
config m.Config
expectFailureError bool
expectRecoverError bool
config Config
expectErr bool
name string
}{
{
config: m.Config{
CheckIntervalStr: "1s",
Monitors: []*m.Monitor{
config: Config{},
expectErr: false,
name: "Empty",
},
{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Success",
},
},
},
expectFailureError: false,
expectRecoverError: false,
name: "No alerts",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Monitors: []*m.Monitor{
{
Name: "Failure",
AlertDown: []string{"unknown"},
AlertUp: []string{"unknown"},
AlertAfter: 1,
},
},
},
expectFailureError: true,
expectRecoverError: true,
name: "Unknown alerts",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Monitors: []*m.Monitor{
{
Name: "Failure",
AlertDown: []string{"good"},
AlertUp: []string{"good"},
AlertAfter: 1,
},
},
Alerts: []*m.Alert{{
Name: "good",
Command: []string{"true"},
}},
},
expectFailureError: false,
expectRecoverError: false,
name: "Successful alert",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Monitors: []*m.Monitor{
{
Name: "Failure",
AlertDown: []string{"bad"},
AlertUp: []string{"bad"},
AlertAfter: 1,
},
},
Alerts: []*m.Alert{{
Name: "bad",
Command: []string{"false"},
}},
},
expectFailureError: true,
expectRecoverError: true,
name: "Failing alert",
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.config.Init()
if err != nil {
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
}
for _, check := range []struct {
shellCmd string
name string
expectErr bool
}{
{"false", "Failure", c.expectFailureError}, {"true", "Success", c.expectRecoverError},
} {
// Set the shell command for this check
c.config.Monitors[0].ShellCommand = check.shellCmd
// Run the check
err = m.CheckMonitors(&c.config)
// Check the results
if err == nil && check.expectErr {
t.Errorf("checkMonitors(%s:%s): Expected error, the code did not error", c.name, check.name)
} else if err != nil && !check.expectErr {
t.Errorf("checkMonitors(%s:%s): Did not expect an error, but we got one anyway: %v", c.name, check.name, err)
}
}
})
}
}
func TestFirstRunAlerts(t *testing.T) {
cases := []struct {
config m.Config
expectErr bool
startupAlerts []string
name string
}{
{
config: m.Config{
CheckIntervalStr: "1s",
},
expectErr: true,
startupAlerts: []string{"missing"},
name: "Unknown",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Alerts: []*m.Alert{
{
Name: "good",
Command: []string{"true"},
Command: CommandOrShell{Command: []string{"true"}},
},
},
},
expectErr: false,
startupAlerts: []string{"good"},
name: "Successful alert",
name: "Monitor success, no alerts",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Alerts: []*m.Alert{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertAfter: 1,
},
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"unknown"},
AlertAfter: 1,
},
},
},
expectErr: false,
name: "Monitor failure, no and unknown alerts",
},
{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"ls"}},
alertCount: 1,
},
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
alertCount: 1,
},
},
},
expectErr: false,
name: "Monitor recovery, no alerts",
},
{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"good"},
AlertAfter: 1,
},
},
Alerts: map[string]*Alert{
"good": &Alert{
Command: CommandOrShell{Command: []string{"true"}},
},
},
},
expectErr: false,
name: "Monitor failure, successful alert",
},
{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"},
AlertAfter: 1,
},
},
Alerts: map[string]*Alert{
"bad": &Alert{
Name: "bad",
Command: []string{"false"},
Command: CommandOrShell{Command: []string{"false"}},
},
},
},
expectErr: true,
startupAlerts: []string{"bad"},
name: "Failed alert",
name: "Monitor failure, bad alert",
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.config.Init()
if err != nil {
t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
}
err = m.SendStartupAlerts(&c.config, c.startupAlerts)
c.config.Init()
err := checkMonitors(&c.config)
if err == nil && c.expectErr {
t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
} else if err != nil && !c.expectErr {
t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
}
})
}
}
+25
View File
@@ -0,0 +1,25 @@
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}}
{{#if build.tags}}
tags:
{{#each build.tags}}
- {{this}}
{{/each}}
{{/if}}
manifests:
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64
platform:
architecture: amd64
os: linux
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64
platform:
architecture: arm64
os: linux
variant: v8
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm
platform:
architecture: arm
os: linux
variant: v7
+1 -17
View File
@@ -19,7 +19,6 @@ import (
type MinitorMetrics struct {
alertCount *prometheus.CounterVec
checkCount *prometheus.CounterVec
checkTime *prometheus.GaugeVec
monitorStatus *prometheus.GaugeVec
}
@@ -41,13 +40,6 @@ func NewMetrics() *MinitorMetrics {
},
[]string{"monitor", "status", "is_alert"},
),
checkTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_check_milliseconds",
Help: "Time in miliseconds that a check ran for",
},
[]string{"monitor", "status"},
),
monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_monitor_up_count",
@@ -60,7 +52,6 @@ func NewMetrics() *MinitorMetrics {
// Register newly created metrics
prometheus.MustRegister(metrics.alertCount)
prometheus.MustRegister(metrics.checkCount)
prometheus.MustRegister(metrics.checkTime)
prometheus.MustRegister(metrics.monitorStatus)
return metrics
@@ -72,12 +63,11 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
if isUp {
val = 1.0
}
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
}
// CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
status := "failure"
if isSuccess {
status = "success"
@@ -91,10 +81,6 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int
metrics.checkCount.With(
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
).Inc()
metrics.checkTime.With(
prometheus.Labels{"monitor": monitor, "status": status},
).Set(float64(ms))
}
// CountAlert counts an alert
@@ -110,8 +96,6 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
// ServeMetrics starts an http server with a Prometheus metrics handler
func ServeMetrics() {
http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
+57 -76
View File
@@ -1,67 +1,46 @@
package main
import (
"log"
"math"
"os/exec"
"time"
"git.iamthefij.com/iamthefij/slog"
)
// Monitor represents a particular periodic check of a command
type Monitor struct { //nolint:maligned
type Monitor struct {
// Config values
CheckIntervalStr *string `hcl:"check_interval,optional"`
CheckInterval time.Duration
Name string `hcl:"name,label"`
AlertAfter int `hcl:"alert_after,optional"`
AlertEvery *int `hcl:"alert_every,optional"`
AlertDown []string `hcl:"alert_down,optional"`
AlertUp []string `hcl:"alert_up,optional"`
Command []string `hcl:"command,optional"`
ShellCommand string `hcl:"shell_command,optional"`
Name string
Command CommandOrShell
AlertDown []string `yaml:"alert_down"`
AlertUp []string `yaml:"alert_up"`
CheckInterval float64 `yaml:"check_interval"`
AlertAfter int16 `yaml:"alert_after"`
AlertEvery int16 `yaml:"alert_every"`
// Other values
alertCount int
failureCount int
lastCheck time.Time
lastSuccess time.Time
lastOutput string
lastCheckDuration time.Duration
alertCount int16
failureCount int16
lastSuccess time.Time
}
// IsValid returns a boolean indicating if the Monitor has been correctly
// configured
func (monitor Monitor) IsValid() bool {
// TODO: Refactor and return an error containing more information on what was invalid
hasCommand := len(monitor.Command) > 0
hasShellCommand := monitor.ShellCommand != ""
hasValidAlertAfter := monitor.AlertAfter > 0
hasAlertDown := len(monitor.AlertDown) > 0
hasAtLeastOneCommand := hasCommand || hasShellCommand
hasAtMostOneCommand := !(hasCommand && hasShellCommand)
return hasAtLeastOneCommand &&
hasAtMostOneCommand &&
hasValidAlertAfter &&
hasAlertDown
}
func (monitor Monitor) LastOutput() string {
return monitor.lastOutput
return (!monitor.Command.Empty() &&
monitor.getAlertAfter() > 0 &&
monitor.AlertDown != nil)
}
// ShouldCheck returns a boolean indicating if the Monitor is ready to be
// be checked again
func (monitor Monitor) ShouldCheck() bool {
if monitor.lastCheck.IsZero() || monitor.CheckInterval == 0 {
if monitor.lastCheck.IsZero() {
return true
}
sinceLastCheck := time.Since(monitor.lastCheck)
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
return sinceLastCheck >= monitor.CheckInterval
}
@@ -69,22 +48,17 @@ func (monitor Monitor) ShouldCheck() bool {
// and a possible AlertNotice
func (monitor *Monitor) Check() (bool, *AlertNotice) {
var cmd *exec.Cmd
if len(monitor.Command) > 0 {
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
} else if monitor.ShellCommand != "" {
cmd = ShellCommand(monitor.ShellCommand)
if monitor.Command.Command != nil {
cmd = exec.Command(monitor.Command.Command[0], monitor.Command.Command[1:]...)
} else {
slog.Fatalf("Monitor %s has no command configured", monitor.Name)
cmd = ShellCommand(monitor.Command.ShellCommand)
}
checkStartTime := time.Now()
output, err := cmd.CombinedOutput()
monitor.lastCheck = time.Now()
monitor.lastOutput = string(output)
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
var alertNotice *AlertNotice
isSuccess := (err == nil)
if isSuccess {
alertNotice = monitor.success()
@@ -92,11 +66,17 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
alertNotice = monitor.failure()
}
slog.Debugf("Command output: %s", monitor.lastOutput)
slog.OnErrWarnf(err, "Command result: %v", err)
if LogDebug {
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
}
if err != nil {
if LogDebug {
log.Printf("DEBUG: Command result: %v", err)
}
}
slog.Infof(
"%s success=%t, alert=%t",
log.Printf(
"INFO: %s success=%t, alert=%t",
monitor.Name,
isSuccess,
alertNotice != nil,
@@ -105,22 +85,15 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
return isSuccess, alertNotice
}
// IsUp returns the status of the current monitor
func (monitor Monitor) IsUp() bool {
func (monitor Monitor) isUp() bool {
return monitor.alertCount == 0
}
// LastCheckMilliseconds gives number of miliseconds the last check ran for
func (monitor Monitor) LastCheckMilliseconds() int64 {
return monitor.lastCheckDuration.Milliseconds()
}
func (monitor *Monitor) success() (notice *AlertNotice) {
if !monitor.IsUp() {
if !monitor.isUp() {
// Alert that we have recovered
notice = monitor.createAlertNotice(true)
}
monitor.failureCount = 0
monitor.alertCount = 0
monitor.lastSuccess = time.Now()
@@ -131,36 +104,36 @@ func (monitor *Monitor) success() (notice *AlertNotice) {
func (monitor *Monitor) failure() (notice *AlertNotice) {
monitor.failureCount++
// If we haven't hit the minimum failures, we can exit
if monitor.failureCount < monitor.AlertAfter {
slog.Debugf(
"%s failed but did not hit minimum failures. "+
if monitor.failureCount < monitor.getAlertAfter() {
if LogDebug {
log.Printf(
"DEBUG: %s failed but did not hit minimum failures. "+
"Count: %v alert after: %v",
monitor.Name,
monitor.failureCount,
monitor.AlertAfter,
monitor.getAlertAfter(),
)
}
return
}
// Take number of failures after minimum
failureCount := (monitor.failureCount - monitor.AlertAfter)
failureCount := (monitor.failureCount - monitor.getAlertAfter())
// Use alert cadence to determine if we should alert
switch {
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
if monitor.AlertEvery > 0 {
// Handle integer number of failures before alerting
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
} else if monitor.AlertEvery == 0 {
// Handle alerting on first failure only
if failureCount == 0 {
notice = monitor.createAlertNotice(false)
}
case *monitor.AlertEvery > 0:
// Handle integer number of failures before alerting
if failureCount%*monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
default:
} else {
// Handle negative numbers indicating an exponential backoff
if failureCount >= int(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
notice = monitor.createAlertNotice(false)
}
}
@@ -170,7 +143,16 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
monitor.alertCount++
}
return notice
return
}
func (monitor Monitor) getAlertAfter() int16 {
// TODO: Come up with a better way than this method
// Zero is one!
if monitor.AlertAfter == 0 {
return 1
}
return monitor.AlertAfter
}
// GetAlertNames gives a list of alert names for a given monitor status
@@ -178,7 +160,6 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
if up {
return monitor.AlertUp
}
return monitor.AlertDown
}
+134 -120
View File
@@ -1,152 +1,159 @@
package main_test
package main
import (
"reflect"
"log"
"testing"
"time"
m "git.iamthefij.com/iamthefij/minitor-go"
)
// TestMonitorIsValid tests the Monitor.IsValid()
func TestMonitorIsValid(t *testing.T) {
cases := []struct {
monitor m.Monitor
monitor Monitor
expected bool
name string
}{
{m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, true, "Command only"},
{m.Monitor{AlertAfter: 1, ShellCommand: "echo test", AlertDown: []string{"log"}}, true, "CommandShell only"},
{m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}}, false, "No AlertDown"},
{m.Monitor{AlertAfter: 1, AlertDown: []string{"log"}}, false, "No commands"},
{m.Monitor{AlertAfter: -1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, false, "Invalid alert threshold, -1"},
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}, AlertDown: []string{"log"}}, true, "Command only"},
{Monitor{Command: CommandOrShell{ShellCommand: "echo test"}, AlertDown: []string{"log"}}, true, "CommandShell only"},
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}}, false, "No AlertDown"},
{Monitor{AlertDown: []string{"log"}}, false, "No commands"},
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}, AlertDown: []string{"log"}, AlertAfter: -1}, false, "Invalid alert threshold, -1"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
log.Printf("Testing case %s", c.name)
actual := c.monitor.IsValid()
if actual != c.expected {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
// TestMonitorShouldCheck tests the Monitor.ShouldCheck()
func TestMonitorShouldCheck(t *testing.T) {
t.Parallel()
timeNow := time.Now()
timeTenSecAgo := time.Now().Add(time.Second * -10)
timeTwentySecAgo := time.Now().Add(time.Second * -20)
// Create a monitor that should check every second and then verify it checks with some sleeps
monitor := m.Monitor{ShellCommand: "true", CheckInterval: time.Second}
if !monitor.ShouldCheck() {
t.Errorf("New monitor should be ready to check")
cases := []struct {
monitor Monitor
expected bool
name string
}{
{Monitor{}, true, "Empty"},
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
}
monitor.Check()
if monitor.ShouldCheck() {
t.Errorf("Monitor should not be ready to check after a check")
for _, c := range cases {
actual := c.monitor.ShouldCheck()
if actual != c.expected {
t.Errorf("ShouldCheck(%v), expected=%t actual=%t", c.name, c.expected, actual)
}
time.Sleep(time.Second)
if !monitor.ShouldCheck() {
t.Errorf("Monitor should be ready to check after a second")
}
}
// TestMonitorIsUp tests the Monitor.IsUp()
// TestMonitorIsUp tests the Monitor.isUp()
func TestMonitorIsUp(t *testing.T) {
t.Parallel()
// Creating a monitor that should alert after 2 failures. The monitor should be considered up until we reach two failed checks
monitor := m.Monitor{ShellCommand: "false", AlertAfter: 2}
if !monitor.IsUp() {
t.Errorf("New monitor should be considered up")
cases := []struct {
monitor Monitor
expected bool
name string
}{
{Monitor{}, true, "Empty"},
{Monitor{alertCount: 1}, false, "Has alert"},
{Monitor{alertCount: -1}, false, "Negative alerts"},
{Monitor{alertCount: 0}, true, "No alerts"},
}
monitor.Check()
if !monitor.IsUp() {
t.Errorf("Monitor should be considered up with one failure and no alerts")
for _, c := range cases {
log.Printf("Testing case %s", c.name)
actual := c.monitor.isUp()
if actual != c.expected {
t.Errorf("isUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
monitor.Check()
if monitor.IsUp() {
t.Errorf("Monitor should be considered down with one alert")
log.Println("-----")
}
}
// TestMonitorGetAlertNames tests that proper alert names are returned
func TestMonitorGetAlertNames(t *testing.T) {
cases := []struct {
monitor m.Monitor
monitor Monitor
up bool
expected []string
name string
}{
{m.Monitor{}, true, nil, "Empty up"},
{m.Monitor{}, false, nil, "Empty down"},
{m.Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"},
{m.Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"},
{Monitor{}, true, nil, "Empty up"},
{Monitor{}, false, nil, "Empty down"},
{Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"},
{Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
log.Printf("Testing case %s", c.name)
actual := c.monitor.GetAlertNames(c.up)
if !reflect.DeepEqual(actual, c.expected) {
if !EqualSliceString(actual, c.expected) {
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
// TestMonitorSuccess tests the Monitor.success()
func TestMonitorSuccess(t *testing.T) {
cases := []struct {
monitor Monitor
expectNotice bool
name string
}{
{Monitor{}, false, "Empty"},
{Monitor{alertCount: 0}, false, "No alerts"},
{Monitor{alertCount: 1}, true, "Has alert"},
}
for _, c := range cases {
log.Printf("Testing case %s", c.name)
notice := c.monitor.success()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
log.Println("-----")
}
}
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
// hitting the threshold provided by AlertAfter
func TestMonitorFailureAlertAfter(t *testing.T) {
var alertEveryOne int = 1
cases := []struct {
monitor m.Monitor
numChecks int
monitor Monitor
expectNotice bool
name string
}{
{m.Monitor{ShellCommand: "false", AlertAfter: 1}, 1, true, "Empty After 1"}, // Defaults to true because and AlertEvery default to 0
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 1, true, "Alert after 1: first failure"},
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 2, true, "Alert after 1: second failure"},
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 1, false, "Alert after 20: first failure"},
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 20, true, "Alert after 20: 20th failure"},
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 21, true, "Alert after 20: 21st failure"},
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
hasNotice := false
for i := 0; i < c.numChecks; i++ {
_, notice := c.monitor.Check()
hasNotice = (notice != nil)
}
log.Printf("Testing case %s", c.name)
notice := c.monitor.failure()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
@@ -154,42 +161,51 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
// on the expected intervals
func TestMonitorFailureAlertEvery(t *testing.T) {
cases := []struct {
monitor m.Monitor
expectedNotice []bool
monitor Monitor
expectNotice bool
name string
}{
{m.Monitor{ShellCommand: "false", AlertAfter: 1}, []bool{true}, "No AlertEvery set"}, // Defaults to true because AlertAfter and AlertEvery default to nil
/*
TODO: Actually found a bug in original implementation. There is an inconsistency in the way AlertAfter is treated.
For "First alert only" (ie. AlertEvery=0), it is the number of failures to ignore before alerting, so AlertAfter=1
will ignore the first failure and alert on the second failure
For other intervals (ie. AlertEvery=1), it is essentially indexed on one. Essentially making AlertAfter=1 trigger
on the first failure.
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
*/
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
// Alert first time only, after 1
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(0)}, []bool{true, false, false}, "Alert first time only after 1"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
// Alert every time, after 1
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(1)}, []bool{true, true, true}, "Alert every time after 1"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
// Alert every other time, after 1
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(2)}, []bool{true, false, true, false}, "Alert every other time after 1"},
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
}
for _, c := range cases {
c := c
log.Printf("Testing case %s", c.name)
t.Run(c.name, func(t *testing.T) {
t.Parallel()
for i, expectNotice := range c.expectedNotice {
_, notice := c.monitor.Check()
notice := c.monitor.failure()
hasNotice := (notice != nil)
if hasNotice != expectNotice {
t.Errorf("failed %s check %d: expected=%t actual=%t", c.name, i, expectNotice, hasNotice)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
}
})
log.Println("-----")
}
}
// TestMonitorFailureExponential tests that alerts will trigger
// with an exponential backoff after repeated failures
func TestMonitorFailureExponential(t *testing.T) {
var alertEveryExp int = -1
cases := []struct {
expectNotice bool
name string
@@ -206,18 +222,17 @@ func TestMonitorFailureExponential(t *testing.T) {
// Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method
monitor := m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryExp}
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// NOTE: These tests are not parallel because they rely on the state of the Monitor
_, notice := monitor.Check()
hasNotice := (notice != nil)
log.Printf("Testing case %s", c.name)
notice := monitor.failure()
hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
@@ -228,54 +243,53 @@ func TestMonitorCheck(t *testing.T) {
hasNotice bool
lastOutput string
}
cases := []struct {
monitor m.Monitor
monitor Monitor
expect expected
name string
}{
{
m.Monitor{AlertAfter: 1, Command: []string{"echo", "success"}},
Monitor{Command: CommandOrShell{Command: []string{"echo", "success"}}},
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
"Test successful command",
},
{
m.Monitor{AlertAfter: 1, ShellCommand: "echo success"},
Monitor{Command: CommandOrShell{ShellCommand: "echo success"}},
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
"Test successful command shell",
},
{
m.Monitor{AlertAfter: 1, Command: []string{"total", "failure"}},
Monitor{Command: CommandOrShell{Command: []string{"total", "failure"}}},
expected{isSuccess: false, hasNotice: true, lastOutput: ""},
"Test failed command",
},
{
m.Monitor{AlertAfter: 1, ShellCommand: "false"},
Monitor{Command: CommandOrShell{ShellCommand: "false"}},
expected{isSuccess: false, hasNotice: true, lastOutput: ""},
"Test failed command shell",
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
log.Printf("Testing case %s", c.name)
isSuccess, notice := c.monitor.Check()
if isSuccess != c.expect.isSuccess {
t.Errorf("Check(%v) (success), expected=%t actual=%t", c.name, c.expect.isSuccess, isSuccess)
log.Printf("Case failed: %s", c.name)
}
hasNotice := (notice != nil)
if hasNotice != c.expect.hasNotice {
t.Errorf("Check(%v) (notice), expected=%t actual=%t", c.name, c.expect.hasNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
}
lastOutput := c.monitor.LastOutput()
lastOutput := c.monitor.lastOutput
if lastOutput != c.expect.lastOutput {
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
log.Printf("Case failed: %s", c.name)
}
})
log.Println("-----")
}
}
-52
View File
@@ -1,52 +0,0 @@
check_interval = "5s"
monitor "Fake Website" {
command = ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
alert_down = ["log_down", "mailgun_down", "sms_down"]
alert_up = ["log_up", "email_up"]
check_interval = "10s" # Must be at minimum the global `check_interval`
alert_after = 3
alert_every = -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
}
monitor "Real Website" {
command = ["curl", "-s", "-o", "/dev/null", "https://google.com"]
alert_down = ["log_down", "mailgun_down", "sms_down"]
alert_up = ["log_up", "email_up"]
check_interval = "5s"
alert_after = 3
alert_every = -1
}
alert "log_down" {
command = ["echo", "Minitor failure for {{.MonitorName}}"]
}
alert "log_up" {
command = ["echo", "Minitor recovery for {{.MonitorName}}"]
}
alert "email_up" {
command = ["sendmail", "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
}
alert "mailgun_down" {
shell_command = <<-EOF
curl -s -X POST \
-F subject="Alert! {{.MonitorName}} failed" \
-F from="Minitor <minitor@minitor.mon>" \
-F to=me@minitor.mon \
-F text="Our monitor failed" \
https://api.mailgun.net/v3/minitor.mon/messages \
-u "api:${MAILGUN_API_KEY}"
EOF
}
alert "sms_down" {
shell_command = <<-EOF
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed" \
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}" \
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages" \
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
EOF
}
+41
View File
@@ -0,0 +1,41 @@
---
check_interval: 5
monitors:
- name: Fake Website
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 10 # Must be at minimum the global `check_interval`
alert_after: 3
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
- name: Real Website
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 5
alert_after: 3
alert_every: -1
alerts:
log_down:
command: ["echo", "Minitor failure for {{.MonitorName}}"]
log_up:
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
email_up:
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
mailgun_down:
command: >
curl -s -X POST
-F subject="Alert! {{.MonitorName}} failed"
-F from="Minitor <minitor@minitor.mon>"
-F to=me@minitor.mon
-F text="Our monitor failed"
https://api.mailgun.net/v3/minitor.mon/messages
-u "api:${MAILGUN_API_KEY}"
sms_down:
command: >
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed"
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
-6
View File
@@ -1,6 +0,0 @@
#! /bin/sh
# Used for a basic HTTP health check
# Avoids output from non-errors and will fail if the HTTP response is unsuccessful
curl --silent --show-error --fail -o /dev/null "$@"
+3 -15
View File
@@ -11,7 +11,6 @@ set -e
# To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1"
num_log_lines="$2"
# Curls Docker either using a socket or URL
function curl_docker {
@@ -32,32 +31,21 @@ function get_container_id {
# Returns container JSON
function inspect_container {
local container_id="$1"
local container_id=$1
curl_docker "containers/$container_id/json"
}
# Gets some lines from docker log
function get_logs {
container_id="$1"
num_lines="$2"
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
}
if [ -z "$container_name" ]; then
echo "Usage: $0 container_name [num_log_lines]"
echo "Usage: $0 container_name"
echo "Will exit with the last status code of continer with provided name"
exit 1
fi
container_id=$(get_container_id "$container_name")
container_id=$(get_container_id $container_name)
if [ -z "$container_id" ]; then
echo "ERROR: Could not find container with name: $container_name"
exit 1
fi
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
if [ -n "$num_log_lines" ]; then
get_logs "$container_id" "$num_log_lines"
fi
exit "$exit_code"
+1 -13
View File
@@ -11,7 +11,6 @@ set -e
# To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1"
num_log_lines="$2"
# Curls Docker either using a socket or URL
function curl_docker {
@@ -36,15 +35,8 @@ function inspect_container {
curl_docker "containers/$container_id/json"
}
# Gets some lines from docker log
function get_logs {
container_id="$1"
num_lines="$2"
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
}
if [ -z "$container_name" ]; then
echo "Usage: $0 container_name [num_log_lines]"
echo "Usage: $0 container_name"
echo "Will return results of healthcheck for continer with provided name"
exit 1
fi
@@ -56,10 +48,6 @@ if [ -z "$container_id" ]; then
fi
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
if [ -n "$num_log_lines" ]; then
get_logs "$container_id" "$num_log_lines"
fi
case "$health" in
null)
echo "No healthcheck results"
-7
View File
@@ -1,7 +0,0 @@
check_interval = "1s"
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = [ "alert_down", "log_shell", "log_command" ]
alert_every = 0
}
+8
View File
@@ -0,0 +1,8 @@
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: [ 'alert_down', 'log_shell', 'log_command' ]
# alert_every: -1
alert_every: 0
-1
View File
@@ -1 +0,0 @@
check_interval = "woops, I'm not an int!"
+1
View File
@@ -0,0 +1 @@
check_interval: woops, I'm not an int!
-12
View File
@@ -1,12 +0,0 @@
check_interval = "1s"
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = ["not_log"]
alert_every = 0
}
alert "log" {
command = ["true"]
}
+13
View File
@@ -0,0 +1,13 @@
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: [ 'not_log']
# alert_every: -1
alert_every: 0
alerts:
log:
command: ['true']
-11
View File
@@ -1,11 +0,0 @@
check_interval = "1s"
default_alert_down = ["log_command"]
default_alert_after = 1
monitor "Command" {
command = ["echo", "$PATH"]
}
alert "log_command" {
command = ["echo", "default", "'command!!!'", "{{.MonitorName}}"]
}
-29
View File
@@ -1,29 +0,0 @@
check_interval = "1s"
alert "log_command" {
command = ["echo", "regular", "'command!!!'", "{{.MonitorName}}"]
}
alert "log_shell" {
shell_command = "echo \"Failure on {{.MonitorName}} User is $USER\""
}
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = ["log_command", "log_shell"]
alert_every = 2
check_interval = "10s"
}
monitor "Shell" {
shell_command = <<-EOF
echo 'Some string with stuff'
echo 'another line'
echo $PATH
exit 1
EOF
alert_down = ["log_command", "log_shell"]
alert_after = 5
alert_every = 0
check_interval = "1m"
}
+5 -7
View File
@@ -1,25 +1,23 @@
---
check_interval: 1s
check_interval: 1
monitors:
- name: Command
command: ["echo", "$PATH"]
alert_down: ["log_command", "log_shell"]
command: ['echo', '$PATH']
alert_down: ['log_command', 'log_shell']
alert_every: 0
check_interval: 10s
- name: Shell
command: >
echo 'Some string with stuff';
echo 'another line';
echo $PATH;
exit 1
alert_down: ["log_command", "log_shell"]
alert_down: ['log_command', 'log_shell']
alert_after: 5
alert_every: 0
check_interval: 1m
alerts:
log_command:
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
log_shell:
command: echo "Failure on {{.MonitorName}} User is $USER"
-19
View File
@@ -1,19 +0,0 @@
check_interval = "1s"
monitor "Shell" {
shell_command = <<-EOF
echo 'Some string with stuff'
echo "<angle brackets>"
exit 1
EOF
alert_down = ["log_shell"]
alert_after = 1
alert_every = 0
}
alert "log_shell" {
shell_command = <<EOF
echo 'Some string with stuff'
echo '<angle brackets>'
EOF
}
+18
View File
@@ -0,0 +1,18 @@
---
check_interval: 1
monitors:
- name: Shell
command: >
echo 'Some string with stuff';
echo "<angle brackets>";
exit 1
alert_down: ['log_shell']
alert_after: 1
alert_every: 0
alerts:
log_shell:
command: |
echo 'Some string with stuff'
echo '<angle brackets>'
+1 -3
View File
@@ -8,7 +8,7 @@ import (
// ShellCommand takes a string and executes it as a command using `sh`
func ShellCommand(command string) *exec.Cmd {
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
//log.Printf("Shell command: %v", shellCommand)
return exec.Command(shellCommand[0], shellCommand[1:]...)
}
@@ -17,12 +17,10 @@ func EqualSliceString(a, b []string) bool {
if len(a) != len(b) {
return false
}
for i, val := range a {
if val != b[i] {
return false
}
}
return true
}
+1 -10
View File
@@ -1,9 +1,6 @@
package main
import (
"fmt"
"testing"
)
import "testing"
func TestUtilEqualSliceString(t *testing.T) {
cases := []struct {
@@ -24,11 +21,6 @@ func TestUtilEqualSliceString(t *testing.T) {
}
for _, c := range cases {
c := c
t.Run(fmt.Sprintf("%v %v", c.a, c.b), func(t *testing.T) {
t.Parallel()
actual := EqualSliceString(c.a, c.b)
if actual != c.expected {
t.Errorf(
@@ -36,6 +28,5 @@ func TestUtilEqualSliceString(t *testing.T) {
c.a, c.b, c.expected, actual,
)
}
})
}
}