Compare commits
68 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7cac6c94d7 | ||
|
|
e0af17a599 | ||
|
|
a0a6b8199a | ||
|
|
9ec62528d9 | ||
|
|
32745c816c | ||
|
|
3f6c8f5a22 | ||
|
|
67d7e0574e | ||
|
|
7604138c9e | ||
|
|
f58b4c1495 | ||
|
|
6a2b44673e | ||
|
|
01cca50532 | ||
|
|
2789aa63e4 | ||
|
|
37db4b2db0 | ||
|
|
41a1dbeceb | ||
|
|
c02d64d674 | ||
|
|
46f4561bea | ||
|
|
a1e0e9698b | ||
|
|
ded4e129a1 | ||
|
|
95cb24ac04 | ||
|
|
e6447b615f | ||
|
|
2680eabd40 | ||
|
|
23340e823f | ||
|
|
99b8723abc | ||
|
|
7d87c3d036 | ||
|
|
deec04bf0d | ||
|
|
958446050f | ||
|
|
88e94642d9 | ||
|
|
bc83a51907 | ||
|
|
08b8932331 | ||
|
|
9072d97bb8 | ||
|
|
cdd8a69669 | ||
|
|
3c14a02770 | ||
|
|
328ea83c25 | ||
|
|
ce986e8d1d | ||
|
|
31a4b484bf | ||
|
|
49e3635819 | ||
|
|
444d060736 | ||
|
|
860c2cdf43 | ||
|
|
befea7375f | ||
|
|
04395fa693 | ||
|
|
bdf7355fa7 | ||
|
|
30c2c7d6b2 | ||
|
|
5f250f17a8 | ||
|
|
fda9e1bfc3 | ||
|
|
f0e179851f | ||
|
|
9e124803da | ||
|
|
2c4543a7bc | ||
|
|
a1b906b94a | ||
|
|
0a5be250b5 | ||
|
|
88f77aa27c | ||
|
|
67c2375bba | ||
|
|
aad9eaa32f | ||
|
|
5dc5ba5257 | ||
|
|
4aff294739 | ||
|
|
0684b15a44 | ||
|
|
d3826dacde | ||
|
|
f8e40c643c | ||
|
|
cffbbd734a | ||
|
|
ad6f3be6ec | ||
|
|
ae30f477f7 | ||
|
|
9dcd8ebf12 | ||
|
|
11af700618 | ||
|
|
00029a6327 | ||
|
|
9c21880efa | ||
|
|
8b0d3b65cf | ||
|
|
25c5179d3d | ||
|
|
eb7ad0b25e | ||
|
|
3b963f420f |
+50
-64
@@ -3,31 +3,15 @@ kind: pipeline
|
||||
name: test
|
||||
|
||||
steps:
|
||||
|
||||
- name: test
|
||||
image: golang:1.12
|
||||
image: golang:1.20
|
||||
environment:
|
||||
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||
commands:
|
||||
- make build
|
||||
- make test
|
||||
|
||||
- name: check
|
||||
image: python:3
|
||||
commands:
|
||||
- pip install pre-commit==1.20.0
|
||||
- make check
|
||||
|
||||
- name: notify
|
||||
image: drillster/drone-email
|
||||
settings:
|
||||
host:
|
||||
from_secret: SMTP_HOST
|
||||
username:
|
||||
from_secret: SMTP_USER
|
||||
password:
|
||||
from_secret: SMTP_PASS
|
||||
from: drone@iamthefij.com
|
||||
when:
|
||||
status: [changed, failure]
|
||||
image: iamthefij/drone-pre-commit:personal
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
@@ -46,69 +30,71 @@ trigger:
|
||||
|
||||
steps:
|
||||
- name: build all binaries
|
||||
image: golang:1.12
|
||||
image: golang:1.20
|
||||
environment:
|
||||
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||
commands:
|
||||
- make all
|
||||
|
||||
- name: push image - arm
|
||||
image: plugins/docker
|
||||
settings:
|
||||
repo: iamthefij/minitor-go
|
||||
auto_tag: true
|
||||
auto_tag_suffix: linux-arm
|
||||
username:
|
||||
from_secret: docker_username
|
||||
password:
|
||||
from_secret: docker_password
|
||||
build_args:
|
||||
- ARCH=arm
|
||||
- REPO=arm32v7
|
||||
- name: compress binaries for release
|
||||
image: ubuntu
|
||||
commands:
|
||||
- find ./dist -type f -executable -execdir tar -czvf {}.tar.gz {} \;
|
||||
when:
|
||||
event: tag
|
||||
|
||||
- name: push image - arm64
|
||||
image: plugins/docker
|
||||
- name: upload gitea release
|
||||
image: plugins/gitea-release
|
||||
settings:
|
||||
repo: iamthefij/minitor-go
|
||||
auto_tag: true
|
||||
auto_tag_suffix: linux-arm64
|
||||
username:
|
||||
from_secret: docker_username
|
||||
password:
|
||||
from_secret: docker_password
|
||||
build_args:
|
||||
- ARCH=arm64
|
||||
- REPO=arm64v8
|
||||
title: ${DRONE_TAG}
|
||||
files: dist/*.tar.gz
|
||||
checksum:
|
||||
- md5
|
||||
- sha1
|
||||
- sha256
|
||||
- sha512
|
||||
base_url:
|
||||
from_secret: gitea_base_url
|
||||
api_key:
|
||||
from_secret: gitea_token
|
||||
when:
|
||||
event: tag
|
||||
|
||||
- name: push image - amd64
|
||||
image: plugins/docker
|
||||
- name: Build and publish docker images
|
||||
image: thegeeklab/drone-docker-buildx
|
||||
settings:
|
||||
repo: iamthefij/minitor-go
|
||||
auto_tag: true
|
||||
auto_tag_suffix: linux-amd64
|
||||
platforms:
|
||||
- linux/amd64
|
||||
- linux/arm64
|
||||
- linux/arm
|
||||
username:
|
||||
from_secret: docker_username
|
||||
password:
|
||||
from_secret: docker_password
|
||||
|
||||
- name: publish manifest
|
||||
image: plugins/manifest
|
||||
settings:
|
||||
spec: manifest.tmpl
|
||||
auto_tag: true
|
||||
ignore_missing: true
|
||||
username:
|
||||
from_secret: docker_username
|
||||
password:
|
||||
from_secret: docker_password
|
||||
---
|
||||
kind: pipeline
|
||||
name: notify
|
||||
|
||||
depends_on:
|
||||
- test
|
||||
- publish
|
||||
|
||||
trigger:
|
||||
status:
|
||||
- failure
|
||||
|
||||
steps:
|
||||
|
||||
- name: notify
|
||||
image: drillster/drone-email
|
||||
settings:
|
||||
host:
|
||||
from_secret: SMTP_HOST
|
||||
from_secret: SMTP_HOST # pragma: whitelist secret
|
||||
username:
|
||||
from_secret: SMTP_USER
|
||||
from_secret: SMTP_USER # pragma: whitelist secret
|
||||
password:
|
||||
from_secret: SMTP_PASS
|
||||
from_secret: SMTP_PASS # pragma: whitelist secret
|
||||
from: drone@iamthefij.com
|
||||
when:
|
||||
status: [changed, failure]
|
||||
|
||||
Vendored
+2
-2
@@ -17,5 +17,5 @@ config.yml
|
||||
|
||||
# Output binary
|
||||
minitor
|
||||
minitor-linux-*
|
||||
minitor-darwin-amd64
|
||||
minitor-go
|
||||
dist/
|
||||
|
||||
@@ -0,0 +1,36 @@
|
||||
---
|
||||
linters:
|
||||
enable:
|
||||
- errname
|
||||
- errorlint
|
||||
- exhaustive
|
||||
- gofumpt
|
||||
- goimports
|
||||
- gomnd
|
||||
- goprintffuncname
|
||||
- misspell
|
||||
- tagliatelle
|
||||
- tenv
|
||||
- testpackage
|
||||
- thelper
|
||||
- tparallel
|
||||
- unconvert
|
||||
- wrapcheck
|
||||
- wsl
|
||||
disable:
|
||||
- gochecknoglobals
|
||||
|
||||
linters-settings:
|
||||
gosec:
|
||||
excludes:
|
||||
- G204
|
||||
tagliatelle:
|
||||
case:
|
||||
rules:
|
||||
yaml: snake
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- path: _test\.go
|
||||
linters:
|
||||
- gosec
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v2.4.0
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-yaml
|
||||
@@ -10,10 +10,11 @@ repos:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-merge-conflict
|
||||
- repo: git://github.com/dnephin/pre-commit-golang
|
||||
rev: v0.3.5
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.52.2
|
||||
hooks:
|
||||
- id: go-fmt
|
||||
- id: go-imports
|
||||
# - id: gometalinter
|
||||
# - id: golangci-lint
|
||||
- id: golangci-lint
|
||||
- repo: https://github.com/hadolint/hadolint
|
||||
rev: v2.12.1-beta
|
||||
hooks:
|
||||
- id: hadolint
|
||||
|
||||
+5
-9
@@ -1,15 +1,10 @@
|
||||
ARG REPO=library
|
||||
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
|
||||
FROM ${REPO}/alpine:3.10
|
||||
|
||||
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
|
||||
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
|
||||
FROM alpine:3.18
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2024a
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
@@ -19,8 +14,9 @@ COPY ./scripts /app/scripts
|
||||
RUN chmod -R 755 /app/scripts
|
||||
|
||||
# Copy minitor in
|
||||
ARG ARCH=amd64
|
||||
COPY ./minitor-linux-${ARCH} ./minitor
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
COPY ./dist/minitor-${TARGETOS}-${TARGETARCH} ./minitor
|
||||
|
||||
# Drop to non-root user
|
||||
USER minitor
|
||||
|
||||
@@ -1,9 +1,5 @@
|
||||
ARG REPO=library
|
||||
FROM golang:1.12-alpine AS builder
|
||||
FROM golang:1.20 AS builder
|
||||
|
||||
RUN apk add --no-cache git=~2
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
|
||||
COPY ./go.mod ./go.sum /app/
|
||||
@@ -11,12 +7,13 @@ RUN go mod download
|
||||
|
||||
COPY ./*.go /app/
|
||||
|
||||
ARG ARCH=amd64
|
||||
ARG TARGETOS
|
||||
ARG TARGETARCH
|
||||
ARG VERSION=dev
|
||||
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
|
||||
ENV CGO_ENABLED=0 GOOS=$TARGETOS GOARCH=${TARGETARCH}
|
||||
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
|
||||
|
||||
FROM ${REPO}/alpine:3.10
|
||||
FROM alpine:3.18
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
|
||||
@@ -24,7 +21,7 @@ WORKDIR /app/
|
||||
COPY --from=builder /app/minitor .
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2024a
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
|
||||
@@ -1,36 +1,43 @@
|
||||
DOCKER_TAG ?= minitor-go-${USER}
|
||||
GIT_TAG_NAME := $(shell git tag -l --contains HEAD)
|
||||
GIT_SHA := $(shell git rev-parse HEAD)
|
||||
VERSION := $(if $(GIT_TAG_NAME),$(GIT_TAG_NAME),$(GIT_SHA))
|
||||
VERSION ?= $(shell git describe --tags --dirty)
|
||||
GOFILES = *.go go.mod go.sum
|
||||
# Multi-arch targets are generated from this
|
||||
TARGET_ALIAS = minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 minitor-darwin-amd64
|
||||
TARGETS = $(addprefix dist/,$(TARGET_ALIAS))
|
||||
#
|
||||
# Default make target will run tests
|
||||
.DEFAULT_GOAL = test
|
||||
|
||||
# Build all static Minitor binaries
|
||||
.PHONY: all
|
||||
all: minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64
|
||||
all: $(TARGETS)
|
||||
|
||||
.PHONY: default
|
||||
default: test
|
||||
# Build all static Linux Minitor binaries. Used in Docker images
|
||||
.PHONY: all-linux
|
||||
all-linux: $(filter dist/minitor-linux-%,$(TARGETS))
|
||||
|
||||
# Build minitor for the current machine
|
||||
minitor: $(GOFILES)
|
||||
@echo Version: $(VERSION)
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
||||
|
||||
.PHONY: build
|
||||
build: minitor
|
||||
|
||||
minitor:
|
||||
@echo Version: $(VERSION)
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
||||
|
||||
# Run minitor for the current machine
|
||||
.PHONY: run
|
||||
run: minitor build
|
||||
run: minitor
|
||||
./minitor -debug
|
||||
|
||||
.PHONY: run-metrics
|
||||
run-metrics: minitor build
|
||||
run-metrics: minitor
|
||||
./minitor -debug -metrics
|
||||
|
||||
# Run all tests
|
||||
.PHONY: test
|
||||
test:
|
||||
go test -coverprofile=coverage.out
|
||||
@echo
|
||||
go tool cover -func=coverage.out
|
||||
@echo
|
||||
@# Check min coverage percentage
|
||||
@go tool cover -func=coverage.out | awk -v target=80.0% \
|
||||
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
|
||||
|
||||
@@ -39,7 +46,7 @@ test:
|
||||
install-hooks:
|
||||
pre-commit install --install-hooks
|
||||
|
||||
# Checks files for encryption
|
||||
# Runs pre-commit checks on files
|
||||
.PHONY: check
|
||||
check:
|
||||
pre-commit run --all-files
|
||||
@@ -47,9 +54,8 @@ check:
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f ./minitor
|
||||
rm -f ./minitor-linux-*
|
||||
rm -f ./minitor-darwin-amd64
|
||||
rm -f ./coverage.out
|
||||
rm -fr ./dist
|
||||
|
||||
.PHONY: docker-build
|
||||
docker-build:
|
||||
@@ -57,39 +63,27 @@ docker-build:
|
||||
|
||||
.PHONY: docker-run
|
||||
docker-run: docker-build
|
||||
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG)
|
||||
docker run --rm -v $(shell pwd)/sample-config.hcl:/root/config.hcl $(DOCKER_TAG)
|
||||
|
||||
## Multi-arch targets
|
||||
|
||||
# Arch specific go build targets
|
||||
minitor-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 \
|
||||
$(TARGETS): $(GOFILES)
|
||||
mkdir -p ./dist
|
||||
GOOS=$(word 2, $(subst -, ,$(@))) GOARCH=$(word 3, $(subst -, ,$(@))) CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-darwin-amd64
|
||||
-o $@
|
||||
|
||||
minitor-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-amd64
|
||||
|
||||
minitor-linux-arm:
|
||||
GOOS=linux GOARCH=arm CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-arm
|
||||
|
||||
minitor-linux-arm64:
|
||||
GOOS=linux GOARCH=arm64 CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-arm64
|
||||
.PHONY: $(TARGET_ALIAS)
|
||||
$(TARGET_ALIAS):
|
||||
$(MAKE) $(addprefix dist/,$@)
|
||||
|
||||
# Arch specific docker build targets
|
||||
.PHONY: docker-build-arm
|
||||
docker-build-arm: minitor-linux-arm
|
||||
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
|
||||
docker-build-arm: dist/minitor-linux-arm
|
||||
docker build --platform linux/arm . -t ${DOCKER_TAG}-linux-arm
|
||||
|
||||
.PHONY: docker-build-arm
|
||||
docker-build-arm64: minitor-linux-arm64
|
||||
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64
|
||||
.PHONY: docker-build-arm64
|
||||
docker-build-arm64: dist/minitor-linux-arm64
|
||||
docker build --platform linux/arm64 . -t ${DOCKER_TAG}-linux-arm64
|
||||
|
||||
# Cross run on host architechture
|
||||
.PHONY: docker-run-arm
|
||||
|
||||
@@ -1,99 +1,184 @@
|
||||
# minitor-go
|
||||
# [minitor-go](https://git.iamthefij.com/iamthefij/minitor-go)
|
||||
|
||||
A reimplementation of [Minitor](https://git.iamthefij.com/iamthefij/minitor) in Go
|
||||
A minimal monitoring system
|
||||
|
||||
## What does it do?
|
||||
|
||||
Minitor accepts an HCL configuration file with a set of commands to run and a set of alerts to execute when those commands fail. Minitor has a narow feature set and instead follows a principle to outsource to other command line tools when possible. Thus, it relies on other command line tools to do checks and issue alerts. To make getting started a bit easier, Minitor includes a few scripts to help with common tasks.
|
||||
|
||||
## But why?
|
||||
|
||||
I'm running a few small services and found Sensu, Consul, Nagios, etc. to all be far too complicated for my usecase.
|
||||
|
||||
## So how do I use it?
|
||||
|
||||
### Running
|
||||
|
||||
Install and execute with:
|
||||
|
||||
```bash
|
||||
go install github.com/iamthefij/minitor-go@latest
|
||||
minitor
|
||||
```
|
||||
|
||||
If locally developing you can use:
|
||||
|
||||
```bash
|
||||
make run
|
||||
```
|
||||
|
||||
It will read the contents of `sample-config.hcl` and begin its loop. You could also run it directly and provide a new config file via the `-config` argument.
|
||||
|
||||
|
||||
#### Docker
|
||||
|
||||
You can pull this repository directly from Docker:
|
||||
|
||||
```bash
|
||||
docker pull iamthefij/minitor-go:latest
|
||||
```
|
||||
|
||||
The Docker image uses a default `config.hcl` copied from `sample-config.hcl`. This won't really do anything for you, so when you run the Docker image, you should supply your own `config.hcl` file:
|
||||
|
||||
```bash
|
||||
docker run -v $PWD/sample-config.hcl:/app/config.hcl iamthefij/minitor-go:latest
|
||||
```
|
||||
|
||||
Images are provided for `amd64`, `arm`, and `arm64` architechtures.
|
||||
|
||||
You can configure the timezone for the container by passing a `TZ` env variable. Eg. `TZ=America/Los_Angeles`.
|
||||
|
||||
## Configuring
|
||||
|
||||
In this repo, you can explore the `sample-config.hcl` file for an example, but the general structure is as follows. It should be noted that environment variable interpolation happens on load of the HCL file.
|
||||
|
||||
The global configurations are:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|
||||
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|
||||
|`default_alert_every`|A default value used as an `alert_every` value for a monitor if not specified.|
|
||||
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|
||||
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|
||||
|`monitor`|block listing monitors. Detailed description below|
|
||||
|`alert`|List of all alerts. Detailed description below|
|
||||
|
||||
### Monitors
|
||||
|
||||
Represent your monitors as blocks with a label indicating the name of the monitor.
|
||||
|
||||
```hcl
|
||||
monitor "example" {
|
||||
command = ["echo", "Hello, World!"]
|
||||
alert_down = ["log"]
|
||||
alert_up = ["log"]
|
||||
check_interval = "1m"
|
||||
alert_after = 1
|
||||
alert_every = 0
|
||||
}
|
||||
```
|
||||
|
||||
Each monitor allows the following configuration:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`name`|Name of the monitor running. This will show up in messages and logs.|
|
||||
|`command`|A list of strings representing a command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `shell_command`|
|
||||
|`shell_command`|A single string that represents a shell command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `command`|
|
||||
|`alert_down`|A list of Alerts to be triggered when the monitor is in a "down" state|
|
||||
|`alert_up`|A list of Alerts to be triggered when the monitor moves to an "up" state|
|
||||
|`check_interval`|The interval at which this monitor should be checked. This must be greater than the global `check_interval` value|
|
||||
|`alert_after`|Allows specifying the number of failed checks before an alert should be triggered. A value of 1 will start sending alerts after the first failure.|
|
||||
|`alert_every`|Allows specifying how often an alert should be retriggered. There are a few magic numbers here. Defaults to `-1` for an exponential backoff. Setting to `0` disables re-alerting. Positive values will allow retriggering after the specified number of checks|
|
||||
|
||||
### Alerts
|
||||
|
||||
Represent your alerts as blocks with a lable indicating the name of the alert. The name will be used in your monitor setup in `alert_down` and `alert_up`.
|
||||
|
||||
```hcl
|
||||
monitor "example" {
|
||||
command = ["false"]
|
||||
alert_down = ["log"]
|
||||
}
|
||||
|
||||
alert "log" {
|
||||
shell_command = "echo '{{.MonitorName}} is down!'"
|
||||
}
|
||||
```
|
||||
|
||||
Each alert allows the following configuration:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`command`|Specifies the command that should be executed in exec form. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `shell_command`|
|
||||
|`shell_command`|Specifies a shell command as a single string. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `command`|
|
||||
|
||||
Also, when alerts are executed, they will be passed through Go's format function with arguments for some attributes of the Monitor. The following monitor specific variables can be referenced using Go formatting syntax:
|
||||
|
||||
|token|value|
|
||||
|---|---|
|
||||
|`{{.AlertCount}}`|Number of times this monitor has alerted|
|
||||
|`{{.FailureCount}}`|The total number of sequential failed checks for this monitor|
|
||||
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|
||||
|`{{.LastSuccess}}`|The datetime of the last successful check as a go Time struct|
|
||||
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|
||||
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
|
||||
|
||||
To provide flexible formatting, the following non-standard functions are available in templates:
|
||||
|
||||
|func|description|
|
||||
|---|---|
|
||||
|`ANSIC <Time>`|Formats provided time in ANSIC format|
|
||||
|`UnixDate <Time>`|Formats provided time in UnixDate format|
|
||||
|`RubyDate <Time>`|Formats provided time in RubyDate format|
|
||||
|`RFC822Z <Time>`|Formats provided time in RFC822Z format|
|
||||
|`RFC850 <Time>`|Formats provided time in RFC850 format|
|
||||
|`RFC1123 <Time>`|Formats provided time in RFC1123 format|
|
||||
|`RFC1123Z <Time>`|Formats provided time in RFC1123Z format|
|
||||
|`RFC3339 <Time>`|Formats provided time in RFC3339 format|
|
||||
|`RFC3339Nano <Time>`|Formats provided time in RFC3339Nano format|
|
||||
|`FormatTime <Time> <string template>`|Formats provided time according to provided template|
|
||||
|`InTZ <Time> <string timezone name>`|Converts provided time to parsed timezone from the provided name|
|
||||
|
||||
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
|
||||
|
||||
#### Running alerts on startup
|
||||
|
||||
It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
|
||||
|
||||
Eg.
|
||||
|
||||
```bash
|
||||
minitor -startup-alerts=log_down,log_up -config ./config.hcl
|
||||
```
|
||||
|
||||
### Metrics
|
||||
|
||||
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
|
||||
|
||||
It is also possible to use the metrics endpoint for monitoring Minitor itself! This allows setting up multiple instances of Minitor on different servers and have them monitor each-other so that you can detect a minitor outage.
|
||||
|
||||
To run minitor with metrics, use the `-metrics` flag. The metrics will be served on port `8080` by default, though it can be overriden using `-metrics-port`. They will be accessible on the path `/metrics`. Eg. `localhost:8080/metrics`.
|
||||
|
||||
```bash
|
||||
minitor -metrics
|
||||
# or
|
||||
minitor -metrics -metrics-port 3000
|
||||
```
|
||||
|
||||
## Contributing
|
||||
|
||||
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
||||
|
||||
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
|
||||
|
||||
Github Mirror: https://github.com/IamTheFij/minitor.git
|
||||
|
||||
## Original Minitor
|
||||
|
||||
This is a reimplementation of [Minitor](https://git.iamthefij.com/iamthefij/minitor) in Go
|
||||
|
||||
Minitor is already a minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a large footprint. Thus Go feels like a better fit for the project, longer term.
|
||||
|
||||
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
|
||||
|
||||
## Differences from Python version
|
||||
|
||||
There are a few key differences between the Python version and the v0.x Go version.
|
||||
|
||||
First, configuration keys cannot have multiple types in Go, so a different key must be used when specifying a Shell command as a string rather than a list of args. Instead of `command`, you must use `command_shell`. Eg:
|
||||
|
||||
minitor-py:
|
||||
```yaml
|
||||
monitors:
|
||||
- name: Exec command
|
||||
command: ['echo', 'test']
|
||||
- name: Shell command
|
||||
command: echo 'test'
|
||||
```
|
||||
|
||||
minitor-go:
|
||||
```yaml
|
||||
monitors:
|
||||
- name: Exec command
|
||||
command: ['echo', 'test']
|
||||
- name: Shell command
|
||||
command_shell: echo 'test'
|
||||
```
|
||||
|
||||
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct, `AlertNotice` defined in `alert.go` and the built in Go templating format. Eg.
|
||||
|
||||
minitor-py:
|
||||
```yaml
|
||||
alerts:
|
||||
log_command:
|
||||
command: ['echo', '{monitor_name}']
|
||||
log_shell:
|
||||
command_shell: 'echo {monitor_name}'
|
||||
```
|
||||
|
||||
minitor-go:
|
||||
```yaml
|
||||
alerts:
|
||||
log_command:
|
||||
command: ['echo', '{{.MonitorName}}']
|
||||
log_shell:
|
||||
command_shell: 'echo {{.MonitorName}}'
|
||||
```
|
||||
|
||||
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
|
||||
|
||||
minitor-py:
|
||||
```yaml
|
||||
alerts:
|
||||
log_shell:
|
||||
command_shell: >
|
||||
echo "line 1"
|
||||
echo "line 2"
|
||||
echo "continued" \
|
||||
"line"
|
||||
```
|
||||
|
||||
minitor-go:
|
||||
```yaml
|
||||
alerts:
|
||||
log_shell:
|
||||
command_shell: >
|
||||
echo "line 1";
|
||||
echo "line 2";
|
||||
echo "continued"
|
||||
"line"
|
||||
```
|
||||
|
||||
## To do
|
||||
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
|
||||
|
||||
Pairity:
|
||||
|
||||
- [x] Run monitor commands
|
||||
- [x] Run monitor commands in a shell
|
||||
- [x] Run alert commands
|
||||
- [x] Run alert commands in a shell
|
||||
- [x] Allow templating of alert commands
|
||||
- [x] Implement Prometheus client to export metrics
|
||||
- [x] Test coverage
|
||||
- [x] Integration testing (manual or otherwise)
|
||||
|
||||
Improvement (potentially breaking):
|
||||
|
||||
- [ ] Implement leveled logging (maybe glog or logrus)
|
||||
- [ ] Consider switching from YAML to TOML
|
||||
- [ ] Consider value of templating vs injecting values into Env variables
|
||||
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging
|
||||
- [ ] Async checking
|
||||
- [ ] Use durations rather than seconds checked in event loop
|
||||
- [ ] Revisit metrics and see if they all make sense
|
||||
|
||||
@@ -2,89 +2,142 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
var (
|
||||
errNoTemplate = errors.New("no template")
|
||||
|
||||
// ErrAlertFailed indicates that an alert failed to send
|
||||
ErrAlertFailed = errors.New("alert failed")
|
||||
)
|
||||
|
||||
// Alert is a config driven mechanism for sending a notice
|
||||
type Alert struct {
|
||||
Name string
|
||||
Command []string
|
||||
CommandShell string `yaml:"command_shell"`
|
||||
Name string `hcl:"name,label"`
|
||||
Command []string `hcl:"command,optional"`
|
||||
ShellCommand string `hcl:"shell_command,optional"`
|
||||
commandTemplate []*template.Template
|
||||
commandShellTemplate *template.Template
|
||||
}
|
||||
|
||||
// AlertNotice captures the context for an alert to be sent
|
||||
type AlertNotice struct {
|
||||
MonitorName string
|
||||
AlertCount int16
|
||||
FailureCount int16
|
||||
LastCheckOutput string
|
||||
LastSuccess time.Time
|
||||
AlertCount int
|
||||
FailureCount int
|
||||
IsUp bool
|
||||
LastSuccess time.Time
|
||||
MonitorName string
|
||||
LastCheckOutput string
|
||||
}
|
||||
|
||||
// IsValid returns a boolean indicating if the Alert has been correctly
|
||||
// configured
|
||||
func (alert Alert) IsValid() bool {
|
||||
atLeastOneCommand := (alert.CommandShell != "" || alert.Command != nil)
|
||||
atMostOneCommand := (alert.CommandShell == "" || alert.Command == nil)
|
||||
return atLeastOneCommand && atMostOneCommand
|
||||
hasAtLeastOneCommand := alert.Command != nil || alert.ShellCommand != ""
|
||||
hasAtMostOneCommand := alert.Command == nil || alert.ShellCommand == ""
|
||||
|
||||
return hasAtLeastOneCommand && hasAtMostOneCommand
|
||||
}
|
||||
|
||||
// BuildTemplates compiles command templates for the Alert
|
||||
func (alert *Alert) BuildTemplates() error {
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Building template for alert %s", alert.Name)
|
||||
slog.Debugf("Building template for alert %s", alert.Name)
|
||||
|
||||
// Time format func factory
|
||||
tff := func(formatString string) func(time.Time) string {
|
||||
return func(t time.Time) string {
|
||||
return t.Format(formatString)
|
||||
}
|
||||
if alert.commandTemplate == nil && alert.Command != nil {
|
||||
}
|
||||
|
||||
// Create some functions for formatting datetimes in popular formats
|
||||
timeFormatFuncs := template.FuncMap{
|
||||
"ANSIC": tff(time.ANSIC),
|
||||
"UnixDate": tff(time.UnixDate),
|
||||
"RubyDate": tff(time.RubyDate),
|
||||
"RFC822Z": tff(time.RFC822Z),
|
||||
"RFC850": tff(time.RFC850),
|
||||
"RFC1123": tff(time.RFC1123),
|
||||
"RFC1123Z": tff(time.RFC1123Z),
|
||||
"RFC3339": tff(time.RFC3339),
|
||||
"RFC3339Nano": tff(time.RFC3339Nano),
|
||||
"FormatTime": func(t time.Time, timeFormat string) string {
|
||||
return t.Format(timeFormat)
|
||||
},
|
||||
"InTZ": func(t time.Time, tzName string) (time.Time, error) {
|
||||
tz, err := time.LoadLocation(tzName)
|
||||
if err != nil {
|
||||
return t, fmt.Errorf("failed to convert time to specified tz: %w", err)
|
||||
}
|
||||
|
||||
return t.In(tz), nil
|
||||
},
|
||||
}
|
||||
|
||||
switch {
|
||||
case alert.commandTemplate == nil && alert.Command != nil:
|
||||
alert.commandTemplate = []*template.Template{}
|
||||
for i, cmdPart := range alert.Command {
|
||||
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
||||
template.New(alert.Name+string(i)).Parse(cmdPart),
|
||||
template.New(alert.Name+fmt.Sprint(i)).Funcs(timeFormatFuncs).Parse(cmdPart),
|
||||
))
|
||||
}
|
||||
} else if alert.commandShellTemplate == nil && alert.CommandShell != "" {
|
||||
case alert.commandShellTemplate == nil && alert.ShellCommand != "":
|
||||
shellCmd := alert.ShellCommand
|
||||
|
||||
alert.commandShellTemplate = template.Must(
|
||||
template.New(alert.Name).Parse(alert.CommandShell),
|
||||
template.New(alert.Name).Funcs(timeFormatFuncs).Parse(shellCmd),
|
||||
)
|
||||
} else {
|
||||
return fmt.Errorf("No template provided for alert %s", alert.Name)
|
||||
default:
|
||||
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Send will send an alert notice by executing the command template
|
||||
func (alert Alert) Send(notice AlertNotice) (output_str string, err error) {
|
||||
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
|
||||
var cmd *exec.Cmd
|
||||
if alert.commandTemplate != nil {
|
||||
|
||||
switch {
|
||||
case alert.commandTemplate != nil:
|
||||
command := []string{}
|
||||
|
||||
for _, cmdTmp := range alert.commandTemplate {
|
||||
var commandBuffer bytes.Buffer
|
||||
|
||||
err = cmdTmp.Execute(&commandBuffer, notice)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
command = append(command, commandBuffer.String())
|
||||
}
|
||||
|
||||
cmd = exec.Command(command[0], command[1:]...)
|
||||
} else if alert.commandShellTemplate != nil {
|
||||
case alert.commandShellTemplate != nil:
|
||||
var commandBuffer bytes.Buffer
|
||||
|
||||
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
shellCommand := commandBuffer.String()
|
||||
|
||||
cmd = ShellCommand(shellCommand)
|
||||
} else {
|
||||
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
|
||||
default:
|
||||
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -95,10 +148,17 @@ func (alert Alert) Send(notice AlertNotice) (output_str string, err error) {
|
||||
|
||||
var output []byte
|
||||
output, err = cmd.CombinedOutput()
|
||||
output_str = string(output)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, output_str)
|
||||
outputStr = string(output)
|
||||
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||
|
||||
if err != nil {
|
||||
err = fmt.Errorf(
|
||||
"Alert %s failed to send. Returned %w: %w",
|
||||
alert.Name,
|
||||
err,
|
||||
ErrAlertFailed,
|
||||
)
|
||||
}
|
||||
|
||||
return output_str, err
|
||||
return outputStr, err
|
||||
}
|
||||
|
||||
+52
-40
@@ -1,69 +1,68 @@
|
||||
package main
|
||||
package main_test
|
||||
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
|
||||
m "git.iamthefij.com/iamthefij/minitor-go"
|
||||
)
|
||||
|
||||
func TestAlertIsValid(t *testing.T) {
|
||||
cases := []struct {
|
||||
alert Alert
|
||||
alert m.Alert
|
||||
expected bool
|
||||
name string
|
||||
}{
|
||||
{Alert{Command: []string{"echo", "test"}}, true, "Command only"},
|
||||
{Alert{CommandShell: "echo test"}, true, "CommandShell only"},
|
||||
{Alert{}, false, "No commands"},
|
||||
{
|
||||
Alert{Command: []string{"echo", "test"}, CommandShell: "echo test"},
|
||||
false,
|
||||
"Both commands",
|
||||
},
|
||||
{m.Alert{Command: []string{"echo", "test"}}, true, "Command only"},
|
||||
{m.Alert{ShellCommand: "echo test"}, true, "CommandShell only"},
|
||||
{m.Alert{}, false, "No commands"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
actual := c.alert.IsValid()
|
||||
if actual != c.expected {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
t.Errorf("expected=%t actual=%t", c.expected, actual)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertSend(t *testing.T) {
|
||||
cases := []struct {
|
||||
alert Alert
|
||||
notice AlertNotice
|
||||
alert m.Alert
|
||||
notice m.AlertNotice
|
||||
expectedOutput string
|
||||
expectErr bool
|
||||
name string
|
||||
}{
|
||||
{
|
||||
Alert{Command: []string{"echo", "{{.MonitorName}}"}},
|
||||
AlertNotice{MonitorName: "test"},
|
||||
m.Alert{Command: []string{"echo", "{{.MonitorName}}"}},
|
||||
m.AlertNotice{MonitorName: "test"},
|
||||
"test\n",
|
||||
false,
|
||||
"Command with template",
|
||||
},
|
||||
{
|
||||
Alert{CommandShell: "echo {{.MonitorName}}"},
|
||||
AlertNotice{MonitorName: "test"},
|
||||
m.Alert{ShellCommand: "echo {{.MonitorName}}"},
|
||||
m.AlertNotice{MonitorName: "test"},
|
||||
"test\n",
|
||||
false,
|
||||
"Command shell with template",
|
||||
},
|
||||
{
|
||||
Alert{Command: []string{"echo", "{{.Bad}}"}},
|
||||
AlertNotice{MonitorName: "test"},
|
||||
m.Alert{Command: []string{"echo", "{{.Bad}}"}},
|
||||
m.AlertNotice{MonitorName: "test"},
|
||||
"",
|
||||
true,
|
||||
"Command with bad template",
|
||||
},
|
||||
{
|
||||
Alert{CommandShell: "echo {{.Bad}}"},
|
||||
AlertNotice{MonitorName: "test"},
|
||||
m.Alert{ShellCommand: "echo {{.Bad}}"},
|
||||
m.AlertNotice{MonitorName: "test"},
|
||||
"",
|
||||
true,
|
||||
"Command shell with bad template",
|
||||
@@ -71,51 +70,64 @@ func TestAlertSend(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c.alert.BuildTemplates()
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
err := c.alert.BuildTemplates()
|
||||
if err != nil {
|
||||
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
|
||||
}
|
||||
|
||||
output, err := c.alert.Send(c.notice)
|
||||
hasErr := (err != nil)
|
||||
|
||||
if output != c.expectedOutput {
|
||||
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestAlertSendNoTemplates(t *testing.T) {
|
||||
alert := Alert{}
|
||||
notice := AlertNotice{}
|
||||
alert := m.Alert{}
|
||||
notice := m.AlertNotice{}
|
||||
|
||||
output, err := alert.Send(notice)
|
||||
if err == nil {
|
||||
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
|
||||
}
|
||||
log.Println("-----")
|
||||
}
|
||||
|
||||
func TestAlertBuildTemplate(t *testing.T) {
|
||||
cases := []struct {
|
||||
alert Alert
|
||||
alert m.Alert
|
||||
expectErr bool
|
||||
name string
|
||||
}{
|
||||
{Alert{Command: []string{"echo", "test"}}, false, "Command only"},
|
||||
{Alert{CommandShell: "echo test"}, false, "CommandShell only"},
|
||||
{Alert{}, true, "No commands"},
|
||||
{m.Alert{Command: []string{"echo", "test"}}, false, "Command only"},
|
||||
{m.Alert{ShellCommand: "echo test"}, false, "CommandShell only"},
|
||||
{m.Alert{}, true, "No commands"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
err := c.alert.BuildTemplates()
|
||||
hasErr := (err != nil)
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,66 +2,50 @@ package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"gopkg.in/yaml.v2"
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
/*
|
||||
* "github.com/hashicorp/hcl/v2"
|
||||
* "github.com/hashicorp/hcl/v2/gohcl"
|
||||
*/
|
||||
"github.com/hashicorp/hcl/v2/hclsimple"
|
||||
)
|
||||
|
||||
var errInvalidConfig = errors.New("Invalid configuration")
|
||||
|
||||
// Config type is contains all provided user configuration
|
||||
type Config struct {
|
||||
CheckInterval int64 `yaml:"check_interval"`
|
||||
Monitors []*Monitor
|
||||
Alerts map[string]*Alert
|
||||
CheckIntervalStr string `hcl:"check_interval"`
|
||||
CheckInterval time.Duration
|
||||
|
||||
DefaultAlertAfter *int `hcl:"default_alert_after,optional"`
|
||||
DefaultAlertEvery *int `hcl:"default_alert_every,optional"`
|
||||
DefaultAlertDown []string `hcl:"default_alert_down,optional"`
|
||||
DefaultAlertUp []string `hcl:"default_alert_up,optional"`
|
||||
Monitors []*Monitor `hcl:"monitor,block"`
|
||||
Alerts []*Alert `hcl:"alert,block"`
|
||||
|
||||
alertLookup map[string]*Alert
|
||||
}
|
||||
|
||||
// IsValid checks config validity and returns true if valid
|
||||
func (config Config) IsValid() (isValid bool) {
|
||||
isValid = true
|
||||
|
||||
// Validate monitors
|
||||
if config.Monitors == nil || len(config.Monitors) == 0 {
|
||||
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
|
||||
isValid = false
|
||||
}
|
||||
for _, monitor := range config.Monitors {
|
||||
if !monitor.IsValid() {
|
||||
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
|
||||
isValid = false
|
||||
}
|
||||
// Check that all Monitor alerts actually exist
|
||||
for _, isUp := range []bool{true, false} {
|
||||
for _, alertName := range monitor.GetAlertNames(isUp) {
|
||||
if _, ok := config.Alerts[alertName]; !ok {
|
||||
log.Printf(
|
||||
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
|
||||
monitor.Name, alertName,
|
||||
)
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
func (c Config) GetAlert(name string) (*Alert, bool) {
|
||||
if c.alertLookup == nil {
|
||||
c.alertLookup = map[string]*Alert{}
|
||||
for _, alert := range c.Alerts {
|
||||
c.alertLookup[alert.Name] = alert
|
||||
}
|
||||
}
|
||||
|
||||
// Validate alerts
|
||||
if config.Alerts == nil || len(config.Alerts) == 0 {
|
||||
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
|
||||
isValid = false
|
||||
}
|
||||
for _, alert := range config.Alerts {
|
||||
if !alert.IsValid() {
|
||||
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
v, ok := c.alertLookup[name]
|
||||
|
||||
return
|
||||
return v, ok
|
||||
}
|
||||
|
||||
// Init performs extra initialization on top of loading the config from file
|
||||
func (config *Config) Init() (err error) {
|
||||
for name, alert := range config.Alerts {
|
||||
alert.Name = name
|
||||
// BuildAllTemplates builds all alert templates
|
||||
func (c *Config) BuildAllTemplates() (err error) {
|
||||
for _, alert := range c.Alerts {
|
||||
if err = alert.BuildTemplates(); err != nil {
|
||||
return
|
||||
}
|
||||
@@ -70,29 +54,119 @@ func (config *Config) Init() (err error) {
|
||||
return
|
||||
}
|
||||
|
||||
// LoadConfig will read config from the given path and parse it
|
||||
func LoadConfig(filePath string) (config Config, err error) {
|
||||
data, err := ioutil.ReadFile(filePath)
|
||||
// IsValid checks config validity and returns true if valid
|
||||
func (config Config) IsValid() (isValid bool) {
|
||||
isValid = true
|
||||
|
||||
// Validate alerts
|
||||
if len(config.Alerts) == 0 {
|
||||
// This should never happen because there is a default alert named 'log' for now
|
||||
slog.Errorf("Invalid alert configuration: Must provide at least one alert")
|
||||
|
||||
isValid = false
|
||||
}
|
||||
|
||||
for _, alert := range config.Alerts {
|
||||
if !alert.IsValid() {
|
||||
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
|
||||
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
|
||||
// Validate monitors
|
||||
if len(config.Monitors) == 0 {
|
||||
slog.Errorf("Invalid monitor configuration: Must provide at least one monitor")
|
||||
|
||||
isValid = false
|
||||
}
|
||||
|
||||
for _, monitor := range config.Monitors {
|
||||
if !monitor.IsValid() {
|
||||
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
||||
|
||||
isValid = false
|
||||
}
|
||||
// Check that all Monitor alerts actually exist
|
||||
for _, isUp := range []bool{true, false} {
|
||||
for _, alertName := range monitor.GetAlertNames(isUp) {
|
||||
if _, ok := config.GetAlert(alertName); !ok {
|
||||
slog.Errorf(
|
||||
"Invalid monitor configuration: %s. Unknown alert %s",
|
||||
monitor.Name, alertName,
|
||||
)
|
||||
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return isValid
|
||||
}
|
||||
|
||||
// Init performs extra initialization on top of loading the config from file
|
||||
func (config *Config) Init() (err error) {
|
||||
config.CheckInterval, err = time.ParseDuration(config.CheckIntervalStr)
|
||||
if err != nil {
|
||||
return
|
||||
return fmt.Errorf("failed to parse top level check_interval duration: %w", err)
|
||||
}
|
||||
|
||||
err = yaml.Unmarshal(data, &config)
|
||||
for _, monitor := range config.Monitors {
|
||||
// TODO: Move this to a Monitor.Init() method
|
||||
|
||||
// Parse the check_interval string into a time.Duration
|
||||
if monitor.CheckIntervalStr != nil {
|
||||
monitor.CheckInterval, err = time.ParseDuration(*monitor.CheckIntervalStr)
|
||||
if err != nil {
|
||||
return
|
||||
return fmt.Errorf("failed to parse check_interval duration for monitor %s: %w", monitor.Name, err)
|
||||
}
|
||||
}
|
||||
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Config values:\n%v\n", config)
|
||||
// Set default values for monitor alerts
|
||||
if monitor.AlertAfter == 0 && config.DefaultAlertAfter != nil {
|
||||
monitor.AlertAfter = *config.DefaultAlertAfter
|
||||
} else if monitor.AlertAfter == 0 {
|
||||
monitor.AlertAfter = 1
|
||||
}
|
||||
|
||||
if !config.IsValid() {
|
||||
err = errors.New("Invalid configuration")
|
||||
return
|
||||
if monitor.AlertEvery == nil {
|
||||
monitor.AlertEvery = config.DefaultAlertEvery
|
||||
}
|
||||
|
||||
// Finish initializing configuration
|
||||
err = config.Init()
|
||||
if monitor.AlertDown == nil {
|
||||
monitor.AlertDown = config.DefaultAlertDown
|
||||
}
|
||||
|
||||
if monitor.AlertUp == nil {
|
||||
monitor.AlertUp = config.DefaultAlertUp
|
||||
}
|
||||
}
|
||||
|
||||
err = config.BuildAllTemplates()
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// LoadConfig will read config from the given path and parse it
|
||||
func LoadConfig(filePath string) (config Config, err error) {
|
||||
err = hclsimple.DecodeFile(filePath, nil, &config)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
slog.Debugf("Config values:\n%v\n", config)
|
||||
|
||||
// Finish initializing configuration
|
||||
if err = config.Init(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !config.IsValid() {
|
||||
err = errInvalidConfig
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
return config, err
|
||||
}
|
||||
|
||||
+55
-48
@@ -1,8 +1,9 @@
|
||||
package main
|
||||
package main_test
|
||||
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
|
||||
m "git.iamthefij.com/iamthefij/minitor-go"
|
||||
)
|
||||
|
||||
func TestLoadConfig(t *testing.T) {
|
||||
@@ -11,86 +12,92 @@ func TestLoadConfig(t *testing.T) {
|
||||
expectErr bool
|
||||
name string
|
||||
}{
|
||||
{"./test/valid-config.yml", false, "Valid config file"},
|
||||
{"./test/does-not-exist", true, "Invalid config path"},
|
||||
{"./test/invalid-config-type.yml", true, "Invalid config type for key"},
|
||||
{"./test/invalid-config-missing-alerts.yml", true, "Invalid config missing alerts"},
|
||||
{"./test/invalid-config-unknown-alert.yml", true, "Invalid config unknown alert"},
|
||||
{"./test/invalid-config-missing-alerts.hcl", true, "Invalid config missing alerts"},
|
||||
{"./test/invalid-config-type.hcl", true, "Invalid config type for key"},
|
||||
{"./test/invalid-config-unknown-alert.hcl", true, "Invalid config unknown alert"},
|
||||
{"./test/valid-config-default-values.hcl", false, "Valid config file with default values"},
|
||||
{"./test/valid-config.hcl", false, "Valid config file"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
_, err := LoadConfig(c.configPath)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
_, err := m.LoadConfig(c.configPath)
|
||||
hasErr := (err != nil)
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMultiLineConfig is a more complicated test stepping through the parsing
|
||||
// and execution of mutli-line strings presented in YAML
|
||||
func TestMultiLineConfig(t *testing.T) {
|
||||
log.Println("Testing multi-line string config")
|
||||
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
||||
t.Parallel()
|
||||
|
||||
config, err := m.LoadConfig("./test/valid-verify-multi-line.hcl")
|
||||
if err != nil {
|
||||
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(parse > string)")
|
||||
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
||||
actual := config.Monitors[0].CommandShell
|
||||
t.Run("Test Monitor with Indented Multi-Line String", func(t *testing.T) {
|
||||
// Verify indented heredoc is as expected
|
||||
expected := "echo 'Some string with stuff'\necho \"<angle brackets>\"\nexit 1\n"
|
||||
actual := config.Monitors[0].ShellCommand
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(>) failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
t.Logf("string actual =`%v`", actual)
|
||||
t.Logf("bytes expected=%v", []byte(expected))
|
||||
t.Logf("bytes actual =%v", []byte(actual))
|
||||
t.Error("Heredoc mismatch")
|
||||
t.Errorf("string expected=`%v`", expected)
|
||||
t.Errorf("string actual =`%v`", actual)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(execute > string)")
|
||||
// Run the monitor and verify the output
|
||||
_, notice := config.Monitors[0].Check()
|
||||
if notice == nil {
|
||||
t.Fatalf("Did not receive an alert notice")
|
||||
t.Fatal("Did not receive an alert notice and should have")
|
||||
}
|
||||
|
||||
// Verify the output of the monitor is as expected
|
||||
expected = "Some string with stuff\n<angle brackets>\n"
|
||||
actual = notice.LastCheckOutput
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
t.Logf("string actual =`%v`", actual)
|
||||
t.Logf("bytes expected=%v", []byte(expected))
|
||||
t.Logf("bytes actual =%v", []byte(actual))
|
||||
t.Error("Output mismatch")
|
||||
t.Errorf("string expected=`%v`", expected)
|
||||
t.Errorf("string actual =`%v`", actual)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Test Alert with Multi-Line String", func(t *testing.T) {
|
||||
alert, ok := config.GetAlert("log_shell")
|
||||
if !ok {
|
||||
t.Fatal("Could not find expected alert 'log_shell'")
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(parse | string)")
|
||||
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
||||
actual = config.Alerts["log_shell"].CommandShell
|
||||
expected := " echo 'Some string with stuff'\n echo '<angle brackets>'\n"
|
||||
actual := alert.ShellCommand
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(|) failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
t.Logf("string actual =`%v`", actual)
|
||||
t.Logf("bytes expected=%v", []byte(expected))
|
||||
t.Logf("bytes actual =%v", []byte(actual))
|
||||
t.Error("Heredoc mismatch")
|
||||
t.Errorf("string expected=`%v`", expected)
|
||||
t.Errorf("string actual =`%v`", actual)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(execute | string)")
|
||||
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
||||
actual, err = alert.Send(m.AlertNotice{})
|
||||
if err != nil {
|
||||
t.Errorf("Execution of alert failed")
|
||||
t.Fatal("Execution of alert failed")
|
||||
}
|
||||
|
||||
expected = "Some string with stuff\n<angle brackets>\n"
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
t.Logf("string actual =`%v`", actual)
|
||||
t.Logf("bytes expected=%v", []byte(expected))
|
||||
t.Logf("bytes actual =%v", []byte(actual))
|
||||
t.Error("Output mismatch")
|
||||
t.Errorf("string expected=`%v`", expected)
|
||||
t.Errorf("string actual =`%v`", actual)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
@@ -1,8 +1,25 @@
|
||||
module git.iamthefij.com/iamthefij/minitor-go
|
||||
|
||||
go 1.12
|
||||
go 1.20
|
||||
|
||||
require (
|
||||
github.com/prometheus/client_golang v1.2.1
|
||||
gopkg.in/yaml.v2 v2.2.4
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0
|
||||
github.com/hashicorp/hcl/v2 v2.11.1
|
||||
github.com/prometheus/client_golang v1.19.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/agext/levenshtein v1.2.1 // indirect
|
||||
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.2.0 // indirect
|
||||
github.com/google/go-cmp v0.6.0 // indirect
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect
|
||||
github.com/prometheus/client_model v0.5.0 // indirect
|
||||
github.com/prometheus/common v0.48.0 // indirect
|
||||
github.com/prometheus/procfs v0.12.0 // indirect
|
||||
github.com/zclconf/go-cty v1.8.0 // indirect
|
||||
golang.org/x/sys v0.16.0 // indirect
|
||||
golang.org/x/text v0.14.0 // indirect
|
||||
google.golang.org/protobuf v1.32.0 // indirect
|
||||
)
|
||||
|
||||
@@ -1,78 +1,77 @@
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q=
|
||||
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8=
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
|
||||
github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8=
|
||||
github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
|
||||
github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
|
||||
github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
|
||||
github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw=
|
||||
github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA=
|
||||
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM=
|
||||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/cespare/xxhash/v2 v2.2.0 h1:DC2CZ1Ep5Y4k3ZQ899DldepgrayRUGE6BBZ/cd9Cj44=
|
||||
github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
|
||||
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE=
|
||||
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
|
||||
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
|
||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
|
||||
github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
|
||||
github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs=
|
||||
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
|
||||
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg=
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w=
|
||||
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
|
||||
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
|
||||
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0=
|
||||
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U=
|
||||
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
|
||||
github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
|
||||
github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI=
|
||||
github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/hashicorp/hcl/v2 v2.11.1 h1:yTyWcXcm9XB0TEkyU/JCRU6rYy4K+mgLtzn2wlrJbcc=
|
||||
github.com/hashicorp/hcl/v2 v2.11.1/go.mod h1:FwWsfWEjyV/CMj8s/gqAuiviY72rJ1/oayI9WftqcKg=
|
||||
github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
|
||||
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
|
||||
github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
|
||||
github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
|
||||
github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
|
||||
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348 h1:MtvEpTB6LX3vkb4ax0b5D2DHbNAUsen0Gx5wZoq3lV4=
|
||||
github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
|
||||
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw=
|
||||
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo=
|
||||
github.com/prometheus/client_golang v1.2.1 h1:JnMpQc6ppsNgw9QPAGF6Dod479itz7lvlsMzzNayLOI=
|
||||
github.com/prometheus/client_golang v1.2.1/go.mod h1:XMU6Z2MjaRKVu/dC1qupJI9SiNkDYzz3xecMgSW/F+U=
|
||||
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
|
||||
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM=
|
||||
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
|
||||
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
|
||||
github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLyCY=
|
||||
github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA=
|
||||
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
|
||||
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
|
||||
github.com/prometheus/procfs v0.0.5 h1:3+auTFlqw+ZaQYJARz6ArODtkaIwtvBTx3N2NehQlL8=
|
||||
github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
|
||||
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
|
||||
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
|
||||
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
|
||||
github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
|
||||
github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
|
||||
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
|
||||
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
|
||||
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
|
||||
github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
|
||||
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
|
||||
github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
|
||||
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
|
||||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
|
||||
github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
|
||||
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
|
||||
github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8=
|
||||
github.com/zclconf/go-cty v1.8.0 h1:s4AvqaeQzJIu3ndv4gVIhplVD0krU+bgrcLSVUnaWuA=
|
||||
github.com/zclconf/go-cty v1.8.0/go.mod h1:vVKLxnk3puL4qRAv72AO+W99LUD4da90g3uUAzyuvAk=
|
||||
github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b/go.mod h1:ZRKQfBXbGkpdV6QMzT3rU1kSTAnfu1dO8dPKjYprgj8=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
|
||||
golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
|
||||
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I=
|
||||
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
|
||||
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
|
||||
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.14.0 h1:ScX5w1eTa3QqT8oi6+ziP7dTV1S2+ALU0bI+0zXKWiQ=
|
||||
golang.org/x/text v0.14.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
|
||||
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
|
||||
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
|
||||
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
|
||||
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
var (
|
||||
// LogDebug will control whether debug messsages should be logged
|
||||
LogDebug = false
|
||||
|
||||
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
||||
ExportMetrics = false
|
||||
// MetricsPort is the port to expose metrics on
|
||||
@@ -20,58 +20,68 @@ var (
|
||||
|
||||
// version of minitor being run
|
||||
version = "dev"
|
||||
|
||||
errUnknownAlert = errors.New("unknown alert")
|
||||
)
|
||||
|
||||
func checkMonitors(config *Config) error {
|
||||
for _, monitor := range config.Monitors {
|
||||
if monitor.ShouldCheck() {
|
||||
success, alertNotice := monitor.Check()
|
||||
|
||||
hasAlert := alertNotice != nil
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, success)
|
||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
||||
|
||||
// Should probably consider refactoring everything below here
|
||||
if alertNotice != nil {
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
|
||||
}
|
||||
func SendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
|
||||
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
|
||||
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
||||
|
||||
if alertNames == nil {
|
||||
// This should only happen for a recovery alert. AlertDown is validated not empty
|
||||
log.Printf(
|
||||
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||
slog.Warningf(
|
||||
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||
alertNotice.MonitorName, alertNotice.IsUp,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, alertName := range alertNames {
|
||||
if alert, ok := config.Alerts[alertName]; ok {
|
||||
if alert, ok := config.GetAlert(alertName); ok {
|
||||
output, err := alert.Send(*alertNotice)
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"ERROR: Alert '%s' failed. result=%v: output=%s",
|
||||
slog.Errorf(
|
||||
"Alert '%s' failed. result=%v: output=%s",
|
||||
alert.Name,
|
||||
err,
|
||||
output,
|
||||
)
|
||||
return fmt.Errorf(
|
||||
"Unsuccessfully triggered alert '%s'. "+
|
||||
"Crashing to avoid false negatives: %v",
|
||||
alert.Name,
|
||||
err,
|
||||
)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Count alert metrics
|
||||
Metrics.CountAlert(monitor.Name, alert.Name)
|
||||
} else {
|
||||
// This case should never actually happen since we validate against it
|
||||
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
|
||||
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func CheckMonitors(config *Config) error {
|
||||
// TODO: Run this in goroutines and capture exceptions
|
||||
for _, monitor := range config.Monitors {
|
||||
if monitor.ShouldCheck() {
|
||||
success, alertNotice := monitor.Check()
|
||||
hasAlert := alertNotice != nil
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
|
||||
|
||||
if alertNotice != nil {
|
||||
err := SendAlerts(config, monitor, alertNotice)
|
||||
// If there was an error in sending an alert, exit early and bubble it up
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -79,39 +89,75 @@ func checkMonitors(config *Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func SendStartupAlerts(config *Config, alertNames []string) error {
|
||||
for _, alertName := range alertNames {
|
||||
var err error
|
||||
|
||||
alert, ok := config.GetAlert(alertName)
|
||||
if !ok {
|
||||
err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
|
||||
}
|
||||
|
||||
if err == nil {
|
||||
_, err = alert.Send(AlertNotice{
|
||||
AlertCount: 0,
|
||||
FailureCount: 0,
|
||||
IsUp: true,
|
||||
LastSuccess: time.Now(),
|
||||
MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name),
|
||||
LastCheckOutput: "",
|
||||
})
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get debug flag
|
||||
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
|
||||
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
||||
startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")
|
||||
|
||||
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
|
||||
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
||||
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
|
||||
flag.Parse()
|
||||
|
||||
// Print version if flag is provided
|
||||
if *showVersion {
|
||||
log.Println("Minitor version:", version)
|
||||
fmt.Println("Minitor version:", version)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Load configuration
|
||||
config, err := LoadConfig("config.yml")
|
||||
if err != nil {
|
||||
log.Fatalf("Error loading config: %v", err)
|
||||
}
|
||||
config, err := LoadConfig(*configPath)
|
||||
slog.OnErrFatalf(err, "Error loading config: %v", err)
|
||||
|
||||
// Serve metrics exporter, if specified
|
||||
if ExportMetrics {
|
||||
log.Println("INFO: Exporting metrics to Prometheus")
|
||||
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
|
||||
|
||||
go ServeMetrics()
|
||||
}
|
||||
|
||||
if *startupAlerts != "" {
|
||||
alertNames := strings.Split(*startupAlerts, ",")
|
||||
|
||||
err = SendStartupAlerts(&config, alertNames)
|
||||
|
||||
slog.OnErrPanicf(err, "Error running startup alerts")
|
||||
}
|
||||
|
||||
// Start main loop
|
||||
for {
|
||||
err = checkMonitors(&config)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
err = CheckMonitors(&config)
|
||||
slog.OnErrPanicf(err, "Error checking monitors")
|
||||
|
||||
sleepTime := time.Duration(config.CheckInterval) * time.Second
|
||||
time.Sleep(sleepTime)
|
||||
time.Sleep(config.CheckInterval)
|
||||
}
|
||||
}
|
||||
|
||||
+152
-74
@@ -1,114 +1,192 @@
|
||||
package main
|
||||
package main_test
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
|
||||
m "git.iamthefij.com/iamthefij/minitor-go"
|
||||
)
|
||||
|
||||
func Ptr[T any](v T) *T {
|
||||
return &v
|
||||
}
|
||||
|
||||
// TestCheckConfig tests the checkConfig function
|
||||
// It also tests results for potentially invalid configuration. For example, no alerts
|
||||
func TestCheckMonitors(t *testing.T) {
|
||||
cases := []struct {
|
||||
config Config
|
||||
expectErr bool
|
||||
config m.Config
|
||||
expectFailureError bool
|
||||
expectRecoverError bool
|
||||
name string
|
||||
}{
|
||||
{
|
||||
config: Config{},
|
||||
expectErr: false,
|
||||
name: "Empty",
|
||||
},
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Monitors: []*m.Monitor{
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
Name: "Success",
|
||||
Command: []string{"true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
name: "Monitor success, no alerts",
|
||||
expectFailureError: false,
|
||||
expectRecoverError: false,
|
||||
name: "No alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Monitors: []*m.Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
Command: []string{"false"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
&Monitor{
|
||||
Name: "Failure",
|
||||
Command: []string{"false"},
|
||||
AlertDown: []string{"unknown"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
name: "Monitor failure, no and unknown alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
Name: "Success",
|
||||
Command: []string{"ls"},
|
||||
alertCount: 1,
|
||||
},
|
||||
&Monitor{
|
||||
Name: "Success",
|
||||
Command: []string{"true"},
|
||||
AlertUp: []string{"unknown"},
|
||||
alertCount: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
name: "Monitor recovery, no alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
Name: "Failure",
|
||||
Command: []string{"false"},
|
||||
AlertDown: []string{"good"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
Alerts: map[string]*Alert{
|
||||
"good": &Alert{
|
||||
},
|
||||
expectFailureError: true,
|
||||
expectRecoverError: true,
|
||||
name: "Unknown alerts",
|
||||
},
|
||||
{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Monitors: []*m.Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
AlertDown: []string{"good"},
|
||||
AlertUp: []string{"good"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
Alerts: []*m.Alert{{
|
||||
Name: "good",
|
||||
Command: []string{"true"},
|
||||
}},
|
||||
},
|
||||
expectFailureError: false,
|
||||
expectRecoverError: false,
|
||||
name: "Successful alert",
|
||||
},
|
||||
{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Monitors: []*m.Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
AlertDown: []string{"bad"},
|
||||
AlertUp: []string{"bad"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
Alerts: []*m.Alert{{
|
||||
Name: "bad",
|
||||
Command: []string{"false"},
|
||||
}},
|
||||
},
|
||||
expectFailureError: true,
|
||||
expectRecoverError: true,
|
||||
name: "Failing alert",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
err := c.config.Init()
|
||||
if err != nil {
|
||||
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
|
||||
}
|
||||
|
||||
for _, check := range []struct {
|
||||
shellCmd string
|
||||
name string
|
||||
expectErr bool
|
||||
}{
|
||||
{"false", "Failure", c.expectFailureError}, {"true", "Success", c.expectRecoverError},
|
||||
} {
|
||||
// Set the shell command for this check
|
||||
c.config.Monitors[0].ShellCommand = check.shellCmd
|
||||
|
||||
// Run the check
|
||||
err = m.CheckMonitors(&c.config)
|
||||
|
||||
// Check the results
|
||||
if err == nil && check.expectErr {
|
||||
t.Errorf("checkMonitors(%s:%s): Expected error, the code did not error", c.name, check.name)
|
||||
} else if err != nil && !check.expectErr {
|
||||
t.Errorf("checkMonitors(%s:%s): Did not expect an error, but we got one anyway: %v", c.name, check.name, err)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestFirstRunAlerts(t *testing.T) {
|
||||
cases := []struct {
|
||||
config m.Config
|
||||
expectErr bool
|
||||
startupAlerts []string
|
||||
name string
|
||||
}{
|
||||
{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
},
|
||||
expectErr: true,
|
||||
startupAlerts: []string{"missing"},
|
||||
name: "Unknown",
|
||||
},
|
||||
{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Alerts: []*m.Alert{
|
||||
{
|
||||
Name: "good",
|
||||
Command: []string{"true"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
name: "Monitor failure, successful alert",
|
||||
startupAlerts: []string{"good"},
|
||||
name: "Successful alert",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
Name: "Failure",
|
||||
Command: []string{"false"},
|
||||
AlertDown: []string{"bad"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
Alerts: map[string]*Alert{
|
||||
"bad": &Alert{
|
||||
config: m.Config{
|
||||
CheckIntervalStr: "1s",
|
||||
Alerts: []*m.Alert{
|
||||
{
|
||||
Name: "bad",
|
||||
Command: []string{"false"},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
name: "Monitor failure, bad alert",
|
||||
startupAlerts: []string{"bad"},
|
||||
name: "Failed alert",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
c.config.Init()
|
||||
err := checkMonitors(&c.config)
|
||||
if err == nil && c.expectErr {
|
||||
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
err := c.config.Init()
|
||||
if err != nil {
|
||||
t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
|
||||
}
|
||||
|
||||
err = m.SendStartupAlerts(&c.config, c.startupAlerts)
|
||||
if err == nil && c.expectErr {
|
||||
t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
|
||||
} else if err != nil && !c.expectErr {
|
||||
t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}}
|
||||
{{#if build.tags}}
|
||||
tags:
|
||||
{{#each build.tags}}
|
||||
- {{this}}
|
||||
{{/each}}
|
||||
{{/if}}
|
||||
manifests:
|
||||
-
|
||||
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64
|
||||
platform:
|
||||
architecture: amd64
|
||||
os: linux
|
||||
-
|
||||
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64
|
||||
platform:
|
||||
architecture: arm64
|
||||
os: linux
|
||||
variant: v8
|
||||
-
|
||||
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm
|
||||
platform:
|
||||
architecture: arm
|
||||
os: linux
|
||||
variant: v7
|
||||
+17
-1
@@ -19,6 +19,7 @@ import (
|
||||
type MinitorMetrics struct {
|
||||
alertCount *prometheus.CounterVec
|
||||
checkCount *prometheus.CounterVec
|
||||
checkTime *prometheus.GaugeVec
|
||||
monitorStatus *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
@@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
|
||||
},
|
||||
[]string{"monitor", "status", "is_alert"},
|
||||
),
|
||||
checkTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_check_milliseconds",
|
||||
Help: "Time in miliseconds that a check ran for",
|
||||
},
|
||||
[]string{"monitor", "status"},
|
||||
),
|
||||
monitorStatus: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_monitor_up_count",
|
||||
@@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
|
||||
// Register newly created metrics
|
||||
prometheus.MustRegister(metrics.alertCount)
|
||||
prometheus.MustRegister(metrics.checkCount)
|
||||
prometheus.MustRegister(metrics.checkTime)
|
||||
prometheus.MustRegister(metrics.monitorStatus)
|
||||
|
||||
return metrics
|
||||
@@ -63,11 +72,12 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
||||
if isUp {
|
||||
val = 1.0
|
||||
}
|
||||
|
||||
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
||||
}
|
||||
|
||||
// CountCheck counts the result of a particular Monitor check
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
|
||||
status := "failure"
|
||||
if isSuccess {
|
||||
status = "success"
|
||||
@@ -81,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
|
||||
metrics.checkCount.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||
).Inc()
|
||||
|
||||
metrics.checkTime.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status},
|
||||
).Set(float64(ms))
|
||||
}
|
||||
|
||||
// CountAlert counts an alert
|
||||
@@ -96,6 +110,8 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
|
||||
// ServeMetrics starts an http server with a Prometheus metrics handler
|
||||
func ServeMetrics() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
host := fmt.Sprintf(":%d", MetricsPort)
|
||||
|
||||
_ = http.ListenAndServe(host, nil)
|
||||
}
|
||||
|
||||
+75
-60
@@ -1,50 +1,67 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
// Monitor represents a particular periodic check of a command
|
||||
type Monitor struct {
|
||||
type Monitor struct { //nolint:maligned
|
||||
// Config values
|
||||
Name string
|
||||
Command []string
|
||||
CommandShell string `yaml:"command_shell"`
|
||||
AlertDown []string `yaml:"alert_down"`
|
||||
AlertUp []string `yaml:"alert_up"`
|
||||
CheckInterval float64 `yaml:"check_interval"`
|
||||
AlertAfter int16 `yaml:"alert_after"`
|
||||
AlertEvery int16 `yaml:"alert_every"`
|
||||
CheckIntervalStr *string `hcl:"check_interval,optional"`
|
||||
CheckInterval time.Duration
|
||||
|
||||
Name string `hcl:"name,label"`
|
||||
AlertAfter int `hcl:"alert_after,optional"`
|
||||
AlertEvery *int `hcl:"alert_every,optional"`
|
||||
AlertDown []string `hcl:"alert_down,optional"`
|
||||
AlertUp []string `hcl:"alert_up,optional"`
|
||||
Command []string `hcl:"command,optional"`
|
||||
ShellCommand string `hcl:"shell_command,optional"`
|
||||
|
||||
// Other values
|
||||
alertCount int
|
||||
failureCount int
|
||||
lastCheck time.Time
|
||||
lastOutput string
|
||||
alertCount int16
|
||||
failureCount int16
|
||||
lastSuccess time.Time
|
||||
lastOutput string
|
||||
lastCheckDuration time.Duration
|
||||
}
|
||||
|
||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||
// configured
|
||||
func (monitor Monitor) IsValid() bool {
|
||||
atLeastOneCommand := (monitor.CommandShell != "" || monitor.Command != nil)
|
||||
atMostOneCommand := (monitor.CommandShell == "" || monitor.Command == nil)
|
||||
return (atLeastOneCommand &&
|
||||
atMostOneCommand &&
|
||||
monitor.getAlertAfter() > 0 &&
|
||||
monitor.AlertDown != nil)
|
||||
// TODO: Refactor and return an error containing more information on what was invalid
|
||||
hasCommand := len(monitor.Command) > 0
|
||||
hasShellCommand := monitor.ShellCommand != ""
|
||||
hasValidAlertAfter := monitor.AlertAfter > 0
|
||||
hasAlertDown := len(monitor.AlertDown) > 0
|
||||
|
||||
hasAtLeastOneCommand := hasCommand || hasShellCommand
|
||||
hasAtMostOneCommand := !(hasCommand && hasShellCommand)
|
||||
|
||||
return hasAtLeastOneCommand &&
|
||||
hasAtMostOneCommand &&
|
||||
hasValidAlertAfter &&
|
||||
hasAlertDown
|
||||
}
|
||||
|
||||
func (monitor Monitor) LastOutput() string {
|
||||
return monitor.lastOutput
|
||||
}
|
||||
|
||||
// ShouldCheck returns a boolean indicating if the Monitor is ready to be
|
||||
// be checked again
|
||||
func (monitor Monitor) ShouldCheck() bool {
|
||||
if monitor.lastCheck.IsZero() {
|
||||
if monitor.lastCheck.IsZero() || monitor.CheckInterval == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
|
||||
sinceLastCheck := time.Since(monitor.lastCheck)
|
||||
|
||||
return sinceLastCheck >= monitor.CheckInterval
|
||||
}
|
||||
|
||||
@@ -52,17 +69,22 @@ func (monitor Monitor) ShouldCheck() bool {
|
||||
// and a possible AlertNotice
|
||||
func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
var cmd *exec.Cmd
|
||||
if monitor.Command != nil {
|
||||
if len(monitor.Command) > 0 {
|
||||
cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
|
||||
} else if monitor.ShellCommand != "" {
|
||||
cmd = ShellCommand(monitor.ShellCommand)
|
||||
} else {
|
||||
cmd = ShellCommand(monitor.CommandShell)
|
||||
slog.Fatalf("Monitor %s has no command configured", monitor.Name)
|
||||
}
|
||||
|
||||
checkStartTime := time.Now()
|
||||
output, err := cmd.CombinedOutput()
|
||||
monitor.lastCheck = time.Now()
|
||||
monitor.lastOutput = string(output)
|
||||
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
|
||||
|
||||
var alertNotice *AlertNotice
|
||||
|
||||
isSuccess := (err == nil)
|
||||
if isSuccess {
|
||||
alertNotice = monitor.success()
|
||||
@@ -70,17 +92,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
alertNotice = monitor.failure()
|
||||
}
|
||||
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
|
||||
}
|
||||
if err != nil {
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command result: %v", err)
|
||||
}
|
||||
}
|
||||
slog.Debugf("Command output: %s", monitor.lastOutput)
|
||||
slog.OnErrWarnf(err, "Command result: %v", err)
|
||||
|
||||
log.Printf(
|
||||
"INFO: %s success=%t, alert=%t",
|
||||
slog.Infof(
|
||||
"%s success=%t, alert=%t",
|
||||
monitor.Name,
|
||||
isSuccess,
|
||||
alertNotice != nil,
|
||||
@@ -89,15 +105,22 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
return isSuccess, alertNotice
|
||||
}
|
||||
|
||||
func (monitor Monitor) isUp() bool {
|
||||
// IsUp returns the status of the current monitor
|
||||
func (monitor Monitor) IsUp() bool {
|
||||
return monitor.alertCount == 0
|
||||
}
|
||||
|
||||
// LastCheckMilliseconds gives number of miliseconds the last check ran for
|
||||
func (monitor Monitor) LastCheckMilliseconds() int64 {
|
||||
return monitor.lastCheckDuration.Milliseconds()
|
||||
}
|
||||
|
||||
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||
if !monitor.isUp() {
|
||||
if !monitor.IsUp() {
|
||||
// Alert that we have recovered
|
||||
notice = monitor.createAlertNotice(true)
|
||||
}
|
||||
|
||||
monitor.failureCount = 0
|
||||
monitor.alertCount = 0
|
||||
monitor.lastSuccess = time.Now()
|
||||
@@ -108,36 +131,36 @@ func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||
func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
monitor.failureCount++
|
||||
// If we haven't hit the minimum failures, we can exit
|
||||
if monitor.failureCount < monitor.getAlertAfter() {
|
||||
if LogDebug {
|
||||
log.Printf(
|
||||
"DEBUG: %s failed but did not hit minimum failures. "+
|
||||
if monitor.failureCount < monitor.AlertAfter {
|
||||
slog.Debugf(
|
||||
"%s failed but did not hit minimum failures. "+
|
||||
"Count: %v alert after: %v",
|
||||
monitor.Name,
|
||||
monitor.failureCount,
|
||||
monitor.getAlertAfter(),
|
||||
monitor.AlertAfter,
|
||||
)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Take number of failures after minimum
|
||||
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
||||
failureCount := (monitor.failureCount - monitor.AlertAfter)
|
||||
|
||||
// Use alert cadence to determine if we should alert
|
||||
if monitor.AlertEvery > 0 {
|
||||
// Handle integer number of failures before alerting
|
||||
if failureCount%monitor.AlertEvery == 0 {
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
} else if monitor.AlertEvery == 0 {
|
||||
switch {
|
||||
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
|
||||
// Handle alerting on first failure only
|
||||
if failureCount == 0 {
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
} else {
|
||||
case *monitor.AlertEvery > 0:
|
||||
// Handle integer number of failures before alerting
|
||||
if failureCount%*monitor.AlertEvery == 0 {
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
default:
|
||||
// Handle negative numbers indicating an exponential backoff
|
||||
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
|
||||
if failureCount >= int(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
}
|
||||
@@ -147,16 +170,7 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
monitor.alertCount++
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (monitor Monitor) getAlertAfter() int16 {
|
||||
// TODO: Come up with a better way than this method
|
||||
// Zero is one!
|
||||
if monitor.AlertAfter == 0 {
|
||||
return 1
|
||||
}
|
||||
return monitor.AlertAfter
|
||||
return notice
|
||||
}
|
||||
|
||||
// GetAlertNames gives a list of alert names for a given monitor status
|
||||
@@ -164,6 +178,7 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
|
||||
if up {
|
||||
return monitor.AlertUp
|
||||
}
|
||||
|
||||
return monitor.AlertDown
|
||||
}
|
||||
|
||||
|
||||
+120
-139
@@ -1,164 +1,152 @@
|
||||
package main
|
||||
package main_test
|
||||
|
||||
import (
|
||||
"log"
|
||||
"reflect"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
m "git.iamthefij.com/iamthefij/minitor-go"
|
||||
)
|
||||
|
||||
// TestMonitorIsValid tests the Monitor.IsValid()
|
||||
func TestMonitorIsValid(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
monitor m.Monitor
|
||||
expected bool
|
||||
name string
|
||||
}{
|
||||
{Monitor{Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, true, "Command only"},
|
||||
{Monitor{CommandShell: "echo test", AlertDown: []string{"log"}}, true, "CommandShell only"},
|
||||
{Monitor{Command: []string{"echo", "test"}}, false, "No AlertDown"},
|
||||
{Monitor{AlertDown: []string{"log"}}, false, "No commands"},
|
||||
{
|
||||
Monitor{Command: []string{"echo", "test"}, CommandShell: "echo test", AlertDown: []string{"log"}},
|
||||
false,
|
||||
"Both commands",
|
||||
},
|
||||
{Monitor{Command: []string{"echo", "test"}, AlertDown: []string{"log"}, AlertAfter: -1}, false, "Invalid alert threshold, -1"},
|
||||
{m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, true, "Command only"},
|
||||
{m.Monitor{AlertAfter: 1, ShellCommand: "echo test", AlertDown: []string{"log"}}, true, "CommandShell only"},
|
||||
{m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}}, false, "No AlertDown"},
|
||||
{m.Monitor{AlertAfter: 1, AlertDown: []string{"log"}}, false, "No commands"},
|
||||
{m.Monitor{AlertAfter: -1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, false, "Invalid alert threshold, -1"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
actual := c.monitor.IsValid()
|
||||
if actual != c.expected {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorShouldCheck tests the Monitor.ShouldCheck()
|
||||
func TestMonitorShouldCheck(t *testing.T) {
|
||||
timeNow := time.Now()
|
||||
timeTenSecAgo := time.Now().Add(time.Second * -10)
|
||||
timeTwentySecAgo := time.Now().Add(time.Second * -20)
|
||||
t.Parallel()
|
||||
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
expected bool
|
||||
name string
|
||||
}{
|
||||
{Monitor{}, true, "Empty"},
|
||||
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
|
||||
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
|
||||
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
|
||||
// Create a monitor that should check every second and then verify it checks with some sleeps
|
||||
monitor := m.Monitor{ShellCommand: "true", CheckInterval: time.Second}
|
||||
|
||||
if !monitor.ShouldCheck() {
|
||||
t.Errorf("New monitor should be ready to check")
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
actual := c.monitor.ShouldCheck()
|
||||
if actual != c.expected {
|
||||
t.Errorf("ShouldCheck(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
monitor.Check()
|
||||
|
||||
if monitor.ShouldCheck() {
|
||||
t.Errorf("Monitor should not be ready to check after a check")
|
||||
}
|
||||
|
||||
time.Sleep(time.Second)
|
||||
|
||||
if !monitor.ShouldCheck() {
|
||||
t.Errorf("Monitor should be ready to check after a second")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorIsUp tests the Monitor.isUp()
|
||||
// TestMonitorIsUp tests the Monitor.IsUp()
|
||||
func TestMonitorIsUp(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
expected bool
|
||||
name string
|
||||
}{
|
||||
{Monitor{}, true, "Empty"},
|
||||
{Monitor{alertCount: 1}, false, "Has alert"},
|
||||
{Monitor{alertCount: -1}, false, "Negative alerts"},
|
||||
{Monitor{alertCount: 0}, true, "No alerts"},
|
||||
t.Parallel()
|
||||
|
||||
// Creating a monitor that should alert after 2 failures. The monitor should be considered up until we reach two failed checks
|
||||
monitor := m.Monitor{ShellCommand: "false", AlertAfter: 2}
|
||||
if !monitor.IsUp() {
|
||||
t.Errorf("New monitor should be considered up")
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
actual := c.monitor.isUp()
|
||||
if actual != c.expected {
|
||||
t.Errorf("isUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
monitor.Check()
|
||||
|
||||
if !monitor.IsUp() {
|
||||
t.Errorf("Monitor should be considered up with one failure and no alerts")
|
||||
}
|
||||
log.Println("-----")
|
||||
|
||||
monitor.Check()
|
||||
|
||||
if monitor.IsUp() {
|
||||
t.Errorf("Monitor should be considered down with one alert")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorGetAlertNames tests that proper alert names are returned
|
||||
func TestMonitorGetAlertNames(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
monitor m.Monitor
|
||||
up bool
|
||||
expected []string
|
||||
name string
|
||||
}{
|
||||
{Monitor{}, true, nil, "Empty up"},
|
||||
{Monitor{}, false, nil, "Empty down"},
|
||||
{Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"},
|
||||
{Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"},
|
||||
{m.Monitor{}, true, nil, "Empty up"},
|
||||
{m.Monitor{}, false, nil, "Empty down"},
|
||||
{m.Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"},
|
||||
{m.Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
actual := c.monitor.GetAlertNames(c.up)
|
||||
if !EqualSliceString(actual, c.expected) {
|
||||
if !reflect.DeepEqual(actual, c.expected) {
|
||||
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorSuccess tests the Monitor.success()
|
||||
func TestMonitorSuccess(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
expectNotice bool
|
||||
name string
|
||||
}{
|
||||
{Monitor{}, false, "Empty"},
|
||||
{Monitor{alertCount: 0}, false, "No alerts"},
|
||||
{Monitor{alertCount: 1}, true, "Has alert"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
notice := c.monitor.success()
|
||||
hasNotice := (notice != nil)
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
|
||||
// hitting the threshold provided by AlertAfter
|
||||
func TestMonitorFailureAlertAfter(t *testing.T) {
|
||||
var alertEveryOne int = 1
|
||||
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
monitor m.Monitor
|
||||
numChecks int
|
||||
expectNotice bool
|
||||
name string
|
||||
}{
|
||||
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
|
||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
|
||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
|
||||
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
|
||||
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
|
||||
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1}, 1, true, "Empty After 1"}, // Defaults to true because and AlertEvery default to 0
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 1, true, "Alert after 1: first failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 2, true, "Alert after 1: second failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 1, false, "Alert after 20: first failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 20, true, "Alert after 20: 20th failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 21, true, "Alert after 20: 21st failure"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
notice := c.monitor.failure()
|
||||
hasNotice := (notice != nil)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
hasNotice := false
|
||||
|
||||
for i := 0; i < c.numChecks; i++ {
|
||||
_, notice := c.monitor.Check()
|
||||
hasNotice = (notice != nil)
|
||||
}
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -166,51 +154,42 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
|
||||
// on the expected intervals
|
||||
func TestMonitorFailureAlertEvery(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
expectNotice bool
|
||||
monitor m.Monitor
|
||||
expectedNotice []bool
|
||||
name string
|
||||
}{
|
||||
/*
|
||||
TODO: Actually found a bug in original implementation. There is an inconsistency in the way AlertAfter is treated.
|
||||
For "First alert only" (ie. AlertEvery=0), it is the number of failures to ignore before alerting, so AlertAfter=1
|
||||
will ignore the first failure and alert on the second failure
|
||||
For other intervals (ie. AlertEvery=1), it is essentially indexed on one. Essentially making AlertAfter=1 trigger
|
||||
on the first failure.
|
||||
|
||||
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
|
||||
*/
|
||||
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1}, []bool{true}, "No AlertEvery set"}, // Defaults to true because AlertAfter and AlertEvery default to nil
|
||||
// Alert first time only, after 1
|
||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
|
||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
|
||||
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(0)}, []bool{true, false, false}, "Alert first time only after 1"},
|
||||
// Alert every time, after 1
|
||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
|
||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
|
||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(1)}, []bool{true, true, true}, "Alert every time after 1"},
|
||||
// Alert every other time, after 1
|
||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
|
||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
|
||||
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
|
||||
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
|
||||
{m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(2)}, []bool{true, false, true, false}, "Alert every other time after 1"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
notice := c.monitor.failure()
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
for i, expectNotice := range c.expectedNotice {
|
||||
_, notice := c.monitor.Check()
|
||||
hasNotice := (notice != nil)
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
|
||||
if hasNotice != expectNotice {
|
||||
t.Errorf("failed %s check %d: expected=%t actual=%t", c.name, i, expectNotice, hasNotice)
|
||||
}
|
||||
log.Println("-----")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorFailureExponential tests that alerts will trigger
|
||||
// with an exponential backoff after repeated failures
|
||||
func TestMonitorFailureExponential(t *testing.T) {
|
||||
var alertEveryExp int = -1
|
||||
|
||||
cases := []struct {
|
||||
expectNotice bool
|
||||
name string
|
||||
@@ -227,17 +206,18 @@ func TestMonitorFailureExponential(t *testing.T) {
|
||||
|
||||
// Unlike previous tests, this one requires a static Monitor with repeated
|
||||
// calls to the failure method
|
||||
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
monitor := m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryExp}
|
||||
|
||||
notice := monitor.failure()
|
||||
for _, c := range cases {
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
// NOTE: These tests are not parallel because they rely on the state of the Monitor
|
||||
_, notice := monitor.Check()
|
||||
hasNotice := (notice != nil)
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -248,53 +228,54 @@ func TestMonitorCheck(t *testing.T) {
|
||||
hasNotice bool
|
||||
lastOutput string
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
monitor m.Monitor
|
||||
expect expected
|
||||
name string
|
||||
}{
|
||||
{
|
||||
Monitor{Command: []string{"echo", "success"}},
|
||||
m.Monitor{AlertAfter: 1, Command: []string{"echo", "success"}},
|
||||
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
|
||||
"Test successful command",
|
||||
},
|
||||
{
|
||||
Monitor{CommandShell: "echo success"},
|
||||
m.Monitor{AlertAfter: 1, ShellCommand: "echo success"},
|
||||
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
|
||||
"Test successful command shell",
|
||||
},
|
||||
{
|
||||
Monitor{Command: []string{"total", "failure"}},
|
||||
m.Monitor{AlertAfter: 1, Command: []string{"total", "failure"}},
|
||||
expected{isSuccess: false, hasNotice: true, lastOutput: ""},
|
||||
"Test failed command",
|
||||
},
|
||||
{
|
||||
Monitor{CommandShell: "false"},
|
||||
m.Monitor{AlertAfter: 1, ShellCommand: "false"},
|
||||
expected{isSuccess: false, hasNotice: true, lastOutput: ""},
|
||||
"Test failed command shell",
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c := c
|
||||
|
||||
t.Run(c.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
isSuccess, notice := c.monitor.Check()
|
||||
if isSuccess != c.expect.isSuccess {
|
||||
t.Errorf("Check(%v) (success), expected=%t actual=%t", c.name, c.expect.isSuccess, isSuccess)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
hasNotice := (notice != nil)
|
||||
if hasNotice != c.expect.hasNotice {
|
||||
t.Errorf("Check(%v) (notice), expected=%t actual=%t", c.name, c.expect.hasNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
lastOutput := c.monitor.lastOutput
|
||||
lastOutput := c.monitor.LastOutput()
|
||||
if lastOutput != c.expect.lastOutput {
|
||||
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Println("-----")
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
check_interval = "5s"
|
||||
|
||||
monitor "Fake Website" {
|
||||
command = ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
|
||||
alert_down = ["log_down", "mailgun_down", "sms_down"]
|
||||
alert_up = ["log_up", "email_up"]
|
||||
check_interval = "10s" # Must be at minimum the global `check_interval`
|
||||
alert_after = 3
|
||||
alert_every = -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
}
|
||||
|
||||
monitor "Real Website" {
|
||||
command = ["curl", "-s", "-o", "/dev/null", "https://google.com"]
|
||||
alert_down = ["log_down", "mailgun_down", "sms_down"]
|
||||
alert_up = ["log_up", "email_up"]
|
||||
check_interval = "5s"
|
||||
alert_after = 3
|
||||
alert_every = -1
|
||||
}
|
||||
|
||||
alert "log_down" {
|
||||
command = ["echo", "Minitor failure for {{.MonitorName}}"]
|
||||
}
|
||||
|
||||
alert "log_up" {
|
||||
command = ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||
}
|
||||
|
||||
alert "email_up" {
|
||||
command = ["sendmail", "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||
}
|
||||
|
||||
alert "mailgun_down" {
|
||||
shell_command = <<-EOF
|
||||
curl -s -X POST \
|
||||
-F subject="Alert! {{.MonitorName}} failed" \
|
||||
-F from="Minitor <minitor@minitor.mon>" \
|
||||
-F to=me@minitor.mon \
|
||||
-F text="Our monitor failed" \
|
||||
https://api.mailgun.net/v3/minitor.mon/messages \
|
||||
-u "api:${MAILGUN_API_KEY}"
|
||||
EOF
|
||||
}
|
||||
|
||||
alert "sms_down" {
|
||||
shell_command = <<-EOF
|
||||
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed" \
|
||||
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}" \
|
||||
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages" \
|
||||
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
|
||||
EOF
|
||||
}
|
||||
@@ -1,41 +0,0 @@
|
||||
---
|
||||
check_interval: 5
|
||||
|
||||
monitors:
|
||||
- name: Fake Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 10 # Must be at minimum the global `check_interval`
|
||||
alert_after: 3
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
- name: Real Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 5
|
||||
alert_after: 3
|
||||
alert_every: -1
|
||||
|
||||
alerts:
|
||||
log_down:
|
||||
command: ["echo", "Minitor failure for {{.MonitorName}}"]
|
||||
log_up:
|
||||
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||
email_up:
|
||||
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||
mailgun_down:
|
||||
command_shell: >
|
||||
curl -s -X POST
|
||||
-F subject="Alert! {{.MonitorName}} failed"
|
||||
-F from="Minitor <minitor@minitor.mon>"
|
||||
-F to=me@minitor.mon
|
||||
-F text="Our monitor failed"
|
||||
https://api.mailgun.net/v3/minitor.mon/messages
|
||||
-u "api:${MAILGUN_API_KEY}"
|
||||
sms_down:
|
||||
command_shell: >
|
||||
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed"
|
||||
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
|
||||
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
|
||||
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
|
||||
Executable
+6
@@ -0,0 +1,6 @@
|
||||
#! /bin/sh
|
||||
|
||||
# Used for a basic HTTP health check
|
||||
# Avoids output from non-errors and will fail if the HTTP response is unsuccessful
|
||||
|
||||
curl --silent --show-error --fail -o /dev/null "$@"
|
||||
+15
-3
@@ -11,6 +11,7 @@ set -e
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
num_log_lines="$2"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
@@ -31,21 +32,32 @@ function get_container_id {
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id=$1
|
||||
local container_id="$1"
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
# Gets some lines from docker log
|
||||
function get_logs {
|
||||
container_id="$1"
|
||||
num_lines="$2"
|
||||
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Usage: $0 container_name [num_log_lines]"
|
||||
echo "Will exit with the last status code of continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id $container_name)
|
||||
container_id=$(get_container_id "$container_name")
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
||||
|
||||
if [ -n "$num_log_lines" ]; then
|
||||
get_logs "$container_id" "$num_log_lines"
|
||||
fi
|
||||
|
||||
exit "$exit_code"
|
||||
|
||||
@@ -11,6 +11,7 @@ set -e
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
num_log_lines="$2"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
@@ -35,8 +36,15 @@ function inspect_container {
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
# Gets some lines from docker log
|
||||
function get_logs {
|
||||
container_id="$1"
|
||||
num_lines="$2"
|
||||
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Usage: $0 container_name [num_log_lines]"
|
||||
echo "Will return results of healthcheck for continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
@@ -48,6 +56,10 @@ if [ -z "$container_id" ]; then
|
||||
fi
|
||||
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
||||
|
||||
if [ -n "$num_log_lines" ]; then
|
||||
get_logs "$container_id" "$num_log_lines"
|
||||
fi
|
||||
|
||||
case "$health" in
|
||||
null)
|
||||
echo "No healthcheck results"
|
||||
|
||||
@@ -0,0 +1,7 @@
|
||||
check_interval = "1s"
|
||||
|
||||
monitor "Command" {
|
||||
command = ["echo", "$PATH"]
|
||||
alert_down = [ "alert_down", "log_shell", "log_command" ]
|
||||
alert_every = 0
|
||||
}
|
||||
@@ -1,8 +0,0 @@
|
||||
check_interval: 1
|
||||
|
||||
monitors:
|
||||
- name: Command
|
||||
command: ['echo', '$PATH']
|
||||
alert_down: [ 'alert_down', 'log_shell', 'log_command' ]
|
||||
# alert_every: -1
|
||||
alert_every: 0
|
||||
@@ -0,0 +1 @@
|
||||
check_interval = "woops, I'm not an int!"
|
||||
@@ -1 +0,0 @@
|
||||
check_interval: woops, I'm not an int!
|
||||
@@ -0,0 +1,12 @@
|
||||
check_interval = "1s"
|
||||
|
||||
monitor "Command" {
|
||||
command = ["echo", "$PATH"]
|
||||
alert_down = ["not_log"]
|
||||
alert_every = 0
|
||||
}
|
||||
|
||||
|
||||
alert "log" {
|
||||
command = ["true"]
|
||||
}
|
||||
@@ -1,13 +0,0 @@
|
||||
check_interval: 1
|
||||
|
||||
monitors:
|
||||
- name: Command
|
||||
command: ['echo', '$PATH']
|
||||
alert_down: [ 'not_log']
|
||||
# alert_every: -1
|
||||
alert_every: 0
|
||||
|
||||
|
||||
alerts:
|
||||
log:
|
||||
command: ['true']
|
||||
@@ -0,0 +1,11 @@
|
||||
check_interval = "1s"
|
||||
default_alert_down = ["log_command"]
|
||||
default_alert_after = 1
|
||||
|
||||
monitor "Command" {
|
||||
command = ["echo", "$PATH"]
|
||||
}
|
||||
|
||||
alert "log_command" {
|
||||
command = ["echo", "default", "'command!!!'", "{{.MonitorName}}"]
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
check_interval = "1s"
|
||||
|
||||
alert "log_command" {
|
||||
command = ["echo", "regular", "'command!!!'", "{{.MonitorName}}"]
|
||||
}
|
||||
|
||||
alert "log_shell" {
|
||||
shell_command = "echo \"Failure on {{.MonitorName}} User is $USER\""
|
||||
}
|
||||
|
||||
monitor "Command" {
|
||||
command = ["echo", "$PATH"]
|
||||
alert_down = ["log_command", "log_shell"]
|
||||
alert_every = 2
|
||||
check_interval = "10s"
|
||||
}
|
||||
|
||||
monitor "Shell" {
|
||||
shell_command = <<-EOF
|
||||
echo 'Some string with stuff'
|
||||
echo 'another line'
|
||||
echo $PATH
|
||||
exit 1
|
||||
EOF
|
||||
alert_down = ["log_command", "log_shell"]
|
||||
alert_after = 5
|
||||
alert_every = 0
|
||||
check_interval = "1m"
|
||||
}
|
||||
@@ -1,23 +1,25 @@
|
||||
---
|
||||
check_interval: 1
|
||||
check_interval: 1s
|
||||
|
||||
monitors:
|
||||
- name: Command
|
||||
command: ['echo', '$PATH']
|
||||
alert_down: ['log_command', 'log_shell']
|
||||
command: ["echo", "$PATH"]
|
||||
alert_down: ["log_command", "log_shell"]
|
||||
alert_every: 0
|
||||
check_interval: 10s
|
||||
- name: Shell
|
||||
command_shell: >
|
||||
command: >
|
||||
echo 'Some string with stuff';
|
||||
echo 'another line';
|
||||
echo $PATH;
|
||||
exit 1
|
||||
alert_down: ['log_command', 'log_shell']
|
||||
alert_down: ["log_command", "log_shell"]
|
||||
alert_after: 5
|
||||
alert_every: 0
|
||||
check_interval: 1m
|
||||
|
||||
alerts:
|
||||
log_command:
|
||||
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
|
||||
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
|
||||
log_shell:
|
||||
command_shell: echo "Failure on {{.MonitorName}} User is $USER"
|
||||
command: echo "Failure on {{.MonitorName}} User is $USER"
|
||||
|
||||
@@ -0,0 +1,19 @@
|
||||
check_interval = "1s"
|
||||
|
||||
monitor "Shell" {
|
||||
shell_command = <<-EOF
|
||||
echo 'Some string with stuff'
|
||||
echo "<angle brackets>"
|
||||
exit 1
|
||||
EOF
|
||||
alert_down = ["log_shell"]
|
||||
alert_after = 1
|
||||
alert_every = 0
|
||||
}
|
||||
|
||||
alert "log_shell" {
|
||||
shell_command = <<EOF
|
||||
echo 'Some string with stuff'
|
||||
echo '<angle brackets>'
|
||||
EOF
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
---
|
||||
check_interval: 1
|
||||
|
||||
monitors:
|
||||
- name: Shell
|
||||
command_shell: >
|
||||
echo 'Some string with stuff';
|
||||
echo "<angle brackets>";
|
||||
exit 1
|
||||
alert_down: ['log_shell']
|
||||
alert_after: 1
|
||||
alert_every: 0
|
||||
|
||||
alerts:
|
||||
log_shell:
|
||||
command_shell: |
|
||||
echo 'Some string with stuff'
|
||||
echo '<angle brackets>'
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
// ShellCommand takes a string and executes it as a command using `sh`
|
||||
func ShellCommand(command string) *exec.Cmd {
|
||||
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
|
||||
//log.Printf("Shell command: %v", shellCommand)
|
||||
|
||||
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,12 @@ func EqualSliceString(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, val := range a {
|
||||
if val != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
+10
-1
@@ -1,6 +1,9 @@
|
||||
package main
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestUtilEqualSliceString(t *testing.T) {
|
||||
cases := []struct {
|
||||
@@ -21,6 +24,11 @@ func TestUtilEqualSliceString(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
c := c
|
||||
|
||||
t.Run(fmt.Sprintf("%v %v", c.a, c.b), func(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
actual := EqualSliceString(c.a, c.b)
|
||||
if actual != c.expected {
|
||||
t.Errorf(
|
||||
@@ -28,5 +36,6 @@ func TestUtilEqualSliceString(t *testing.T) {
|
||||
c.a, c.b, c.expected, actual,
|
||||
)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user