Compare commits
35 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7d87c3d036 | ||
|
|
deec04bf0d | ||
|
|
958446050f | ||
|
|
88e94642d9 | ||
|
|
bc83a51907 | ||
|
|
08b8932331 | ||
|
|
9072d97bb8 | ||
|
|
cdd8a69669 | ||
|
|
3c14a02770 | ||
|
|
328ea83c25 | ||
|
|
ce986e8d1d | ||
|
|
31a4b484bf | ||
|
|
49e3635819 | ||
|
|
444d060736 | ||
|
|
860c2cdf43 | ||
|
|
befea7375f | ||
|
|
04395fa693 | ||
|
|
bdf7355fa7 | ||
|
|
30c2c7d6b2 | ||
|
|
5f250f17a8 | ||
|
|
fda9e1bfc3 | ||
|
|
f0e179851f | ||
|
|
9e124803da | ||
|
|
2c4543a7bc | ||
|
|
a1b906b94a | ||
|
|
0a5be250b5 | ||
|
|
88f77aa27c | ||
|
|
67c2375bba | ||
|
|
aad9eaa32f | ||
|
|
5dc5ba5257 | ||
|
|
4aff294739 | ||
|
|
0684b15a44 | ||
|
|
d3826dacde | ||
|
|
f8e40c643c | ||
|
|
cffbbd734a |
+48
-26
@@ -3,31 +3,15 @@ kind: pipeline
|
|||||||
name: test
|
name: test
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
- name: test
|
- name: test
|
||||||
image: golang:1.12
|
image: golang:1.17
|
||||||
|
environment:
|
||||||
|
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||||
commands:
|
commands:
|
||||||
- make build
|
|
||||||
- make test
|
- make test
|
||||||
|
|
||||||
- name: check
|
- name: check
|
||||||
image: python:3
|
image: iamthefij/drone-pre-commit:personal
|
||||||
commands:
|
|
||||||
- pip install pre-commit==1.20.0
|
|
||||||
- make check
|
|
||||||
|
|
||||||
- name: notify
|
|
||||||
image: drillster/drone-email
|
|
||||||
settings:
|
|
||||||
host:
|
|
||||||
from_secret: SMTP_HOST
|
|
||||||
username:
|
|
||||||
from_secret: SMTP_USER
|
|
||||||
password:
|
|
||||||
from_secret: SMTP_PASS
|
|
||||||
from: drone@iamthefij.com
|
|
||||||
when:
|
|
||||||
status: [changed, failure]
|
|
||||||
|
|
||||||
---
|
---
|
||||||
kind: pipeline
|
kind: pipeline
|
||||||
@@ -46,10 +30,36 @@ trigger:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: build all binaries
|
- name: build all binaries
|
||||||
image: golang:1.12
|
image: golang:1.17
|
||||||
|
environment:
|
||||||
|
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||||
commands:
|
commands:
|
||||||
- make all
|
- make all
|
||||||
|
|
||||||
|
- name: compress binaries for release
|
||||||
|
image: ubuntu
|
||||||
|
commands:
|
||||||
|
- find ./dist -type f -executable -execdir tar -czvf {}.tar.gz {} \;
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
|
||||||
|
- name: upload gitea release
|
||||||
|
image: plugins/gitea-release
|
||||||
|
settings:
|
||||||
|
title: ${DRONE_TAG}
|
||||||
|
files: dist/*.tar.gz
|
||||||
|
checksum:
|
||||||
|
- md5
|
||||||
|
- sha1
|
||||||
|
- sha256
|
||||||
|
- sha512
|
||||||
|
base_url:
|
||||||
|
from_secret: gitea_base_url
|
||||||
|
api_key:
|
||||||
|
from_secret: gitea_token
|
||||||
|
when:
|
||||||
|
event: tag
|
||||||
|
|
||||||
- name: push image - arm
|
- name: push image - arm
|
||||||
image: plugins/docker
|
image: plugins/docker
|
||||||
settings:
|
settings:
|
||||||
@@ -100,15 +110,27 @@ steps:
|
|||||||
password:
|
password:
|
||||||
from_secret: docker_password
|
from_secret: docker_password
|
||||||
|
|
||||||
|
---
|
||||||
|
kind: pipeline
|
||||||
|
name: notify
|
||||||
|
|
||||||
|
depends_on:
|
||||||
|
- test
|
||||||
|
- publish
|
||||||
|
|
||||||
|
trigger:
|
||||||
|
status:
|
||||||
|
- failure
|
||||||
|
|
||||||
|
steps:
|
||||||
|
|
||||||
- name: notify
|
- name: notify
|
||||||
image: drillster/drone-email
|
image: drillster/drone-email
|
||||||
settings:
|
settings:
|
||||||
host:
|
host:
|
||||||
from_secret: SMTP_HOST
|
from_secret: SMTP_HOST # pragma: whitelist secret
|
||||||
username:
|
username:
|
||||||
from_secret: SMTP_USER
|
from_secret: SMTP_USER # pragma: whitelist secret
|
||||||
password:
|
password:
|
||||||
from_secret: SMTP_PASS
|
from_secret: SMTP_PASS # pragma: whitelist secret
|
||||||
from: drone@iamthefij.com
|
from: drone@iamthefij.com
|
||||||
when:
|
|
||||||
status: [changed, failure]
|
|
||||||
|
|||||||
Vendored
+2
-2
@@ -17,5 +17,5 @@ config.yml
|
|||||||
|
|
||||||
# Output binary
|
# Output binary
|
||||||
minitor
|
minitor
|
||||||
minitor-linux-*
|
minitor-go
|
||||||
minitor-darwin-amd64
|
dist/
|
||||||
|
|||||||
@@ -0,0 +1,36 @@
|
|||||||
|
---
|
||||||
|
linters:
|
||||||
|
enable:
|
||||||
|
- errname
|
||||||
|
- errorlint
|
||||||
|
- exhaustive
|
||||||
|
- gofumpt
|
||||||
|
- goimports
|
||||||
|
- gomnd
|
||||||
|
- goprintffuncname
|
||||||
|
- misspell
|
||||||
|
- tagliatelle
|
||||||
|
- tenv
|
||||||
|
- testpackage
|
||||||
|
- thelper
|
||||||
|
- tparallel
|
||||||
|
- unconvert
|
||||||
|
- wrapcheck
|
||||||
|
- wsl
|
||||||
|
disable:
|
||||||
|
- gochecknoglobals
|
||||||
|
|
||||||
|
linters-settings:
|
||||||
|
gosec:
|
||||||
|
excludes:
|
||||||
|
- G204
|
||||||
|
tagliatelle:
|
||||||
|
case:
|
||||||
|
rules:
|
||||||
|
yaml: snake
|
||||||
|
|
||||||
|
issues:
|
||||||
|
exclude-rules:
|
||||||
|
- path: _test\.go
|
||||||
|
linters:
|
||||||
|
- gosec
|
||||||
@@ -1,7 +1,7 @@
|
|||||||
---
|
---
|
||||||
repos:
|
repos:
|
||||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||||
rev: v2.4.0
|
rev: v4.4.0
|
||||||
hooks:
|
hooks:
|
||||||
- id: check-added-large-files
|
- id: check-added-large-files
|
||||||
- id: check-yaml
|
- id: check-yaml
|
||||||
@@ -10,10 +10,11 @@ repos:
|
|||||||
- id: trailing-whitespace
|
- id: trailing-whitespace
|
||||||
- id: end-of-file-fixer
|
- id: end-of-file-fixer
|
||||||
- id: check-merge-conflict
|
- id: check-merge-conflict
|
||||||
- repo: git://github.com/dnephin/pre-commit-golang
|
- repo: https://github.com/golangci/golangci-lint
|
||||||
rev: v0.3.5
|
rev: v1.50.1
|
||||||
hooks:
|
hooks:
|
||||||
- id: go-fmt
|
- id: golangci-lint
|
||||||
- id: go-imports
|
- repo: https://github.com/hadolint/hadolint
|
||||||
# - id: gometalinter
|
rev: v2.12.1-beta
|
||||||
# - id: golangci-lint
|
hooks:
|
||||||
|
- id: hadolint
|
||||||
|
|||||||
+3
-7
@@ -1,15 +1,11 @@
|
|||||||
ARG REPO=library
|
ARG REPO=library
|
||||||
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
|
FROM ${REPO}/alpine:3.12
|
||||||
FROM ${REPO}/alpine:3.10
|
|
||||||
|
|
||||||
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
|
|
||||||
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
|
|
||||||
|
|
||||||
RUN mkdir /app
|
RUN mkdir /app
|
||||||
WORKDIR /app/
|
WORKDIR /app/
|
||||||
|
|
||||||
# Add common checking tools
|
# Add common checking tools
|
||||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
|
||||||
|
|
||||||
# Add minitor user for running as non-root
|
# Add minitor user for running as non-root
|
||||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||||
@@ -20,7 +16,7 @@ RUN chmod -R 755 /app/scripts
|
|||||||
|
|
||||||
# Copy minitor in
|
# Copy minitor in
|
||||||
ARG ARCH=amd64
|
ARG ARCH=amd64
|
||||||
COPY ./minitor-linux-${ARCH} ./minitor
|
COPY ./dist/minitor-linux-${ARCH} ./minitor
|
||||||
|
|
||||||
# Drop to non-root user
|
# Drop to non-root user
|
||||||
USER minitor
|
USER minitor
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
ARG REPO=library
|
ARG REPO=library
|
||||||
FROM golang:1.12-alpine AS builder
|
FROM golang:1.17 AS builder
|
||||||
|
|
||||||
RUN apk add --no-cache git=~2
|
|
||||||
|
|
||||||
RUN mkdir /app
|
RUN mkdir /app
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -16,7 +14,7 @@ ARG VERSION=dev
|
|||||||
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
|
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
|
||||||
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
|
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
|
||||||
|
|
||||||
FROM ${REPO}/alpine:3.10
|
FROM ${REPO}/alpine:3.12
|
||||||
RUN mkdir /app
|
RUN mkdir /app
|
||||||
WORKDIR /app/
|
WORKDIR /app/
|
||||||
|
|
||||||
@@ -24,7 +22,7 @@ WORKDIR /app/
|
|||||||
COPY --from=builder /app/minitor .
|
COPY --from=builder /app/minitor .
|
||||||
|
|
||||||
# Add common checking tools
|
# Add common checking tools
|
||||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
|
||||||
|
|
||||||
# Add minitor user for running as non-root
|
# Add minitor user for running as non-root
|
||||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||||
|
|||||||
@@ -1,36 +1,43 @@
|
|||||||
DOCKER_TAG ?= minitor-go-${USER}
|
DOCKER_TAG ?= minitor-go-${USER}
|
||||||
GIT_TAG_NAME := $(shell git tag -l --contains HEAD)
|
VERSION ?= $(shell git describe --tags --dirty)
|
||||||
GIT_SHA := $(shell git rev-parse HEAD)
|
GOFILES = *.go
|
||||||
VERSION := $(if $(GIT_TAG_NAME),$(GIT_TAG_NAME),$(GIT_SHA))
|
# Multi-arch targets are generated from this
|
||||||
|
TARGET_ALIAS = minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 minitor-darwin-amd64
|
||||||
|
TARGETS = $(addprefix dist/,$(TARGET_ALIAS))
|
||||||
|
#
|
||||||
|
# Default make target will run tests
|
||||||
|
.DEFAULT_GOAL = test
|
||||||
|
|
||||||
|
# Build all static Minitor binaries
|
||||||
.PHONY: all
|
.PHONY: all
|
||||||
all: minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64
|
all: $(TARGETS)
|
||||||
|
|
||||||
.PHONY: default
|
# Build all static Linux Minitor binaries. Used in Docker images
|
||||||
default: test
|
.PHONY: all-linux
|
||||||
|
all-linux: $(filter dist/minitor-linux-%,$(TARGETS))
|
||||||
|
|
||||||
|
# Build minitor for the current machine
|
||||||
|
minitor: $(GOFILES)
|
||||||
|
@echo Version: $(VERSION)
|
||||||
|
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
||||||
|
|
||||||
.PHONY: build
|
.PHONY: build
|
||||||
build: minitor
|
build: minitor
|
||||||
|
|
||||||
minitor:
|
# Run minitor for the current machine
|
||||||
@echo Version: $(VERSION)
|
|
||||||
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
|
||||||
|
|
||||||
.PHONY: run
|
.PHONY: run
|
||||||
run: minitor build
|
run: minitor
|
||||||
./minitor -debug
|
./minitor -debug
|
||||||
|
|
||||||
.PHONY: run-metrics
|
.PHONY: run-metrics
|
||||||
run-metrics: minitor build
|
run-metrics: minitor
|
||||||
./minitor -debug -metrics
|
./minitor -debug -metrics
|
||||||
|
|
||||||
|
# Run all tests
|
||||||
.PHONY: test
|
.PHONY: test
|
||||||
test:
|
test:
|
||||||
go test -coverprofile=coverage.out
|
go test -coverprofile=coverage.out
|
||||||
@echo
|
|
||||||
go tool cover -func=coverage.out
|
go tool cover -func=coverage.out
|
||||||
@echo
|
|
||||||
@# Check min coverage percentage
|
|
||||||
@go tool cover -func=coverage.out | awk -v target=80.0% \
|
@go tool cover -func=coverage.out | awk -v target=80.0% \
|
||||||
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
|
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
|
||||||
|
|
||||||
@@ -39,7 +46,7 @@ test:
|
|||||||
install-hooks:
|
install-hooks:
|
||||||
pre-commit install --install-hooks
|
pre-commit install --install-hooks
|
||||||
|
|
||||||
# Checks files for encryption
|
# Runs pre-commit checks on files
|
||||||
.PHONY: check
|
.PHONY: check
|
||||||
check:
|
check:
|
||||||
pre-commit run --all-files
|
pre-commit run --all-files
|
||||||
@@ -47,9 +54,8 @@ check:
|
|||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean:
|
clean:
|
||||||
rm -f ./minitor
|
rm -f ./minitor
|
||||||
rm -f ./minitor-linux-*
|
|
||||||
rm -f ./minitor-darwin-amd64
|
|
||||||
rm -f ./coverage.out
|
rm -f ./coverage.out
|
||||||
|
rm -fr ./dist
|
||||||
|
|
||||||
.PHONY: docker-build
|
.PHONY: docker-build
|
||||||
docker-build:
|
docker-build:
|
||||||
@@ -60,35 +66,23 @@ docker-run: docker-build
|
|||||||
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG)
|
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG)
|
||||||
|
|
||||||
## Multi-arch targets
|
## Multi-arch targets
|
||||||
|
$(TARGETS): $(GOFILES)
|
||||||
|
mkdir -p ./dist
|
||||||
|
GOOS=$(word 2, $(subst -, ,$(@))) GOARCH=$(word 3, $(subst -, ,$(@))) CGO_ENABLED=0 \
|
||||||
|
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||||
|
-o $@
|
||||||
|
|
||||||
# Arch specific go build targets
|
.PHONY: $(TARGET_ALIAS)
|
||||||
minitor-darwin-amd64:
|
$(TARGET_ALIAS):
|
||||||
GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 \
|
$(MAKE) $(addprefix dist/,$@)
|
||||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
|
||||||
-o minitor-darwin-amd64
|
|
||||||
|
|
||||||
minitor-linux-amd64:
|
|
||||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
|
||||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
|
||||||
-o minitor-linux-amd64
|
|
||||||
|
|
||||||
minitor-linux-arm:
|
|
||||||
GOOS=linux GOARCH=arm CGO_ENABLED=0 \
|
|
||||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
|
||||||
-o minitor-linux-arm
|
|
||||||
|
|
||||||
minitor-linux-arm64:
|
|
||||||
GOOS=linux GOARCH=arm64 CGO_ENABLED=0 \
|
|
||||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
|
||||||
-o minitor-linux-arm64
|
|
||||||
|
|
||||||
# Arch specific docker build targets
|
# Arch specific docker build targets
|
||||||
.PHONY: docker-build-arm
|
.PHONY: docker-build-arm
|
||||||
docker-build-arm: minitor-linux-arm
|
docker-build-arm: dist/minitor-linux-arm
|
||||||
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
|
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
|
||||||
|
|
||||||
.PHONY: docker-build-arm
|
.PHONY: docker-build-arm64
|
||||||
docker-build-arm64: minitor-linux-arm64
|
docker-build-arm64: dist/minitor-linux-arm64
|
||||||
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64
|
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64
|
||||||
|
|
||||||
# Cross run on host architechture
|
# Cross run on host architechture
|
||||||
|
|||||||
@@ -54,7 +54,10 @@ The global configurations are:
|
|||||||
|
|
||||||
|key|value|
|
|key|value|
|
||||||
|---|---|
|
|---|---|
|
||||||
|`check_interval`|Maximum frequency to run checks for each monitor|
|
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|
||||||
|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified or 0.|
|
||||||
|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|
||||||
|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|
||||||
|`monitors`|List of all monitors. Detailed description below|
|
|`monitors`|List of all monitors. Detailed description below|
|
||||||
|`alerts`|List of all alerts. Detailed description below|
|
|`alerts`|List of all alerts. Detailed description below|
|
||||||
|
|
||||||
@@ -93,6 +96,7 @@ Also, when alerts are executed, they will be passed through Go's format function
|
|||||||
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|
||||||
|`{{.LastSuccess}}`|The ISO datetime of the last successful check|
|
|`{{.LastSuccess}}`|The ISO datetime of the last successful check|
|
||||||
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|
||||||
|
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
|
||||||
|
|
||||||
### Metrics
|
### Metrics
|
||||||
|
|
||||||
@@ -110,7 +114,7 @@ minitor -metrics -metrics-port 3000
|
|||||||
|
|
||||||
## Contributing
|
## Contributing
|
||||||
|
|
||||||
Whether you're looking to submit a patch or just tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
||||||
|
|
||||||
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
|
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
|
||||||
|
|
||||||
@@ -142,15 +146,25 @@ alerts:
|
|||||||
command: 'echo {{.MonitorName}}'
|
command: 'echo {{.MonitorName}}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Interval durations have changed from being an integer number of seconds to a duration string supported by Go, for example:
|
||||||
|
|
||||||
|
minitor-py:
|
||||||
|
```yaml
|
||||||
|
check_interval: 90
|
||||||
|
```
|
||||||
|
|
||||||
|
minitor-go:
|
||||||
|
```yaml
|
||||||
|
check_interval: 1m30s
|
||||||
|
```
|
||||||
|
|
||||||
For the time being, legacy configs for the Python version of Minitor should be compatible if you apply the `-py-compat` flag when running Minitor. Eventually, this flag will go away when later breaking changes are introduced.
|
For the time being, legacy configs for the Python version of Minitor should be compatible if you apply the `-py-compat` flag when running Minitor. Eventually, this flag will go away when later breaking changes are introduced.
|
||||||
|
|
||||||
## Future
|
## Future
|
||||||
|
|
||||||
Future, potentially breaking changes
|
Future, potentially breaking changes
|
||||||
|
|
||||||
- [ ] Implement leveled logging (maybe glog or logrus)
|
|
||||||
- [ ] Consider value of templating vs injecting values into Env variables
|
- [ ] Consider value of templating vs injecting values into Env variables
|
||||||
- [ ] Async checking
|
- [ ] Async checking
|
||||||
- [ ] Revisit metrics and see if they all make sense
|
- [ ] Revisit metrics and see if they all make sense
|
||||||
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging (Breaking)
|
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging (Breaking)
|
||||||
- [ ] Use durations rather than seconds checked in event loop (Potentially breaking)
|
|
||||||
|
|||||||
@@ -2,12 +2,21 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"strings"
|
"strings"
|
||||||
"text/template"
|
"text/template"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.iamthefij.com/iamthefij/slog"
|
||||||
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
errNoTemplate = errors.New("no template")
|
||||||
|
|
||||||
|
// ErrAlertFailed indicates that an alert failed to send
|
||||||
|
ErrAlertFailed = errors.New("alert failed")
|
||||||
)
|
)
|
||||||
|
|
||||||
// Alert is a config driven mechanism for sending a notice
|
// Alert is a config driven mechanism for sending a notice
|
||||||
@@ -20,12 +29,12 @@ type Alert struct {
|
|||||||
|
|
||||||
// AlertNotice captures the context for an alert to be sent
|
// AlertNotice captures the context for an alert to be sent
|
||||||
type AlertNotice struct {
|
type AlertNotice struct {
|
||||||
MonitorName string
|
|
||||||
AlertCount int16
|
AlertCount int16
|
||||||
FailureCount int16
|
FailureCount int16
|
||||||
LastCheckOutput string
|
|
||||||
LastSuccess time.Time
|
|
||||||
IsUp bool
|
IsUp bool
|
||||||
|
LastSuccess time.Time
|
||||||
|
MonitorName string
|
||||||
|
LastCheckOutput string
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid returns a boolean indicating if the Alert has been correctly
|
// IsValid returns a boolean indicating if the Alert has been correctly
|
||||||
@@ -45,29 +54,33 @@ func (alert *Alert) BuildTemplates() error {
|
|||||||
"{last_success}", "{{.LastSuccess}}",
|
"{last_success}", "{{.LastSuccess}}",
|
||||||
"{monitor_name}", "{{.MonitorName}}",
|
"{monitor_name}", "{{.MonitorName}}",
|
||||||
)
|
)
|
||||||
if LogDebug {
|
|
||||||
log.Printf("DEBUG: Building template for alert %s", alert.Name)
|
slog.Debugf("Building template for alert %s", alert.Name)
|
||||||
}
|
|
||||||
if alert.commandTemplate == nil && alert.Command.Command != nil {
|
switch {
|
||||||
|
case alert.commandTemplate == nil && alert.Command.Command != nil:
|
||||||
alert.commandTemplate = []*template.Template{}
|
alert.commandTemplate = []*template.Template{}
|
||||||
for i, cmdPart := range alert.Command.Command {
|
for i, cmdPart := range alert.Command.Command {
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
cmdPart = legacy.Replace(cmdPart)
|
cmdPart = legacy.Replace(cmdPart)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
||||||
template.New(alert.Name+string(i)).Parse(cmdPart),
|
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" {
|
case alert.commandShellTemplate == nil && alert.Command.ShellCommand != "":
|
||||||
shellCmd := alert.Command.ShellCommand
|
shellCmd := alert.Command.ShellCommand
|
||||||
|
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
shellCmd = legacy.Replace(shellCmd)
|
shellCmd = legacy.Replace(shellCmd)
|
||||||
}
|
}
|
||||||
|
|
||||||
alert.commandShellTemplate = template.Must(
|
alert.commandShellTemplate = template.Must(
|
||||||
template.New(alert.Name).Parse(shellCmd),
|
template.New(alert.Name).Parse(shellCmd),
|
||||||
)
|
)
|
||||||
} else {
|
default:
|
||||||
return fmt.Errorf("No template provided for alert %s", alert.Name)
|
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
@@ -75,30 +88,40 @@ func (alert *Alert) BuildTemplates() error {
|
|||||||
|
|
||||||
// Send will send an alert notice by executing the command template
|
// Send will send an alert notice by executing the command template
|
||||||
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||||
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
|
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||||
|
|
||||||
var cmd *exec.Cmd
|
var cmd *exec.Cmd
|
||||||
if alert.commandTemplate != nil {
|
|
||||||
|
switch {
|
||||||
|
case alert.commandTemplate != nil:
|
||||||
command := []string{}
|
command := []string{}
|
||||||
|
|
||||||
for _, cmdTmp := range alert.commandTemplate {
|
for _, cmdTmp := range alert.commandTemplate {
|
||||||
var commandBuffer bytes.Buffer
|
var commandBuffer bytes.Buffer
|
||||||
|
|
||||||
err = cmdTmp.Execute(&commandBuffer, notice)
|
err = cmdTmp.Execute(&commandBuffer, notice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
command = append(command, commandBuffer.String())
|
command = append(command, commandBuffer.String())
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd = exec.Command(command[0], command[1:]...)
|
cmd = exec.Command(command[0], command[1:]...)
|
||||||
} else if alert.commandShellTemplate != nil {
|
case alert.commandShellTemplate != nil:
|
||||||
var commandBuffer bytes.Buffer
|
var commandBuffer bytes.Buffer
|
||||||
|
|
||||||
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
shellCommand := commandBuffer.String()
|
shellCommand := commandBuffer.String()
|
||||||
|
|
||||||
cmd = ShellCommand(shellCommand)
|
cmd = ShellCommand(shellCommand)
|
||||||
} else {
|
default:
|
||||||
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
|
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -110,8 +133,15 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
|||||||
var output []byte
|
var output []byte
|
||||||
output, err = cmd.CombinedOutput()
|
output, err = cmd.CombinedOutput()
|
||||||
outputStr = string(output)
|
outputStr = string(output)
|
||||||
if LogDebug {
|
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||||
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf(
|
||||||
|
"Alert '%s' failed to send. Returned %v: %w",
|
||||||
|
alert.Name,
|
||||||
|
err,
|
||||||
|
ErrAlertFailed,
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
return outputStr, err
|
return outputStr, err
|
||||||
@@ -124,7 +154,7 @@ func NewLogAlert() *Alert {
|
|||||||
Command: CommandOrShell{
|
Command: CommandOrShell{
|
||||||
Command: []string{
|
Command: []string{
|
||||||
"echo",
|
"echo",
|
||||||
"{{.MonitorName}} check has failed {{.FailureCount}} times",
|
"{{.MonitorName}} {{if .IsUp}}has recovered{{else}}check has failed {{.FailureCount}} times{{end}}",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
+34
-1
@@ -18,11 +18,13 @@ func TestAlertIsValid(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.alert.IsValid()
|
actual := c.alert.IsValid()
|
||||||
if actual != c.expected {
|
if actual != c.expected {
|
||||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -76,25 +78,52 @@ func TestAlertSend(t *testing.T) {
|
|||||||
"Command shell with legacy template",
|
"Command shell with legacy template",
|
||||||
true,
|
true,
|
||||||
},
|
},
|
||||||
|
// Test default log alert down
|
||||||
|
{
|
||||||
|
*NewLogAlert(),
|
||||||
|
AlertNotice{MonitorName: "Test", FailureCount: 1, IsUp: false},
|
||||||
|
"Test check has failed 1 times\n",
|
||||||
|
false,
|
||||||
|
"Default log alert down",
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
// Test default log alert up
|
||||||
|
{
|
||||||
|
*NewLogAlert(),
|
||||||
|
AlertNotice{MonitorName: "Test", IsUp: true},
|
||||||
|
"Test has recovered\n",
|
||||||
|
false,
|
||||||
|
"Default log alert up",
|
||||||
|
false,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
// Set PyCompat to value of compat flag
|
// Set PyCompat to value of compat flag
|
||||||
PyCompat = c.pyCompat
|
PyCompat = c.pyCompat
|
||||||
c.alert.BuildTemplates()
|
|
||||||
|
err := c.alert.BuildTemplates()
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
|
||||||
|
}
|
||||||
|
|
||||||
output, err := c.alert.Send(c.notice)
|
output, err := c.alert.Send(c.notice)
|
||||||
hasErr := (err != nil)
|
hasErr := (err != nil)
|
||||||
|
|
||||||
if output != c.expectedOutput {
|
if output != c.expectedOutput {
|
||||||
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
|
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
if hasErr != c.expectErr {
|
if hasErr != c.expectErr {
|
||||||
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
|
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set PyCompat back to default value
|
// Set PyCompat back to default value
|
||||||
PyCompat = false
|
PyCompat = false
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -102,10 +131,12 @@ func TestAlertSend(t *testing.T) {
|
|||||||
func TestAlertSendNoTemplates(t *testing.T) {
|
func TestAlertSendNoTemplates(t *testing.T) {
|
||||||
alert := Alert{}
|
alert := Alert{}
|
||||||
notice := AlertNotice{}
|
notice := AlertNotice{}
|
||||||
|
|
||||||
output, err := alert.Send(notice)
|
output, err := alert.Send(notice)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
|
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -124,10 +155,12 @@ func TestAlertBuildTemplate(t *testing.T) {
|
|||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
err := c.alert.BuildTemplates()
|
err := c.alert.BuildTemplates()
|
||||||
hasErr := (err != nil)
|
hasErr := (err != nil)
|
||||||
|
|
||||||
if hasErr != c.expectErr {
|
if hasErr != c.expectErr {
|
||||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
|
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -3,16 +3,23 @@ package main
|
|||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"log"
|
"time"
|
||||||
|
|
||||||
|
"git.iamthefij.com/iamthefij/slog"
|
||||||
"gopkg.in/yaml.v2"
|
"gopkg.in/yaml.v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var errInvalidConfig = errors.New("Invalid configuration")
|
||||||
|
|
||||||
// Config type is contains all provided user configuration
|
// Config type is contains all provided user configuration
|
||||||
type Config struct {
|
type Config struct {
|
||||||
CheckInterval int64 `yaml:"check_interval"`
|
CheckInterval SecondsOrDuration `yaml:"check_interval"`
|
||||||
Monitors []*Monitor
|
DefaultAlertAfter int16 `yaml:"default_alert_after"`
|
||||||
Alerts map[string]*Alert
|
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
|
||||||
|
DefaultAlertDown []string `yaml:"default_alert_down"`
|
||||||
|
DefaultAlertUp []string `yaml:"default_alert_up"`
|
||||||
|
Monitors []*Monitor
|
||||||
|
Alerts map[string]*Alert
|
||||||
}
|
}
|
||||||
|
|
||||||
// CommandOrShell type wraps a string or list of strings
|
// CommandOrShell type wraps a string or list of strings
|
||||||
@@ -35,17 +42,48 @@ func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) erro
|
|||||||
// Error indicates this is shell command
|
// Error indicates this is shell command
|
||||||
if err != nil {
|
if err != nil {
|
||||||
var shellCmd string
|
var shellCmd string
|
||||||
|
|
||||||
err := unmarshal(&shellCmd)
|
err := unmarshal(&shellCmd)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
cos.ShellCommand = shellCmd
|
cos.ShellCommand = shellCmd
|
||||||
} else {
|
} else {
|
||||||
cos.Command = cmd
|
cos.Command = cmd
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SecondsOrDuration wraps a duration value for parsing a duration or seconds from YAML
|
||||||
|
// NOTE: This should be removed in favor of only parsing durations once compatibility is broken
|
||||||
|
type SecondsOrDuration struct {
|
||||||
|
value time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// Value returns a duration value
|
||||||
|
func (sod SecondsOrDuration) Value() time.Duration {
|
||||||
|
return sod.value
|
||||||
|
}
|
||||||
|
|
||||||
|
// UnmarshalYAML allows unmarshalling a duration value or seconds if an int was provided
|
||||||
|
func (sod *SecondsOrDuration) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||||
|
var seconds int64
|
||||||
|
err := unmarshal(&seconds)
|
||||||
|
|
||||||
|
if err == nil {
|
||||||
|
sod.value = time.Second * time.Duration(seconds)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error indicates that we don't have an int
|
||||||
|
err = unmarshal(&sod.value)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// IsValid checks config validity and returns true if valid
|
// IsValid checks config validity and returns true if valid
|
||||||
func (config Config) IsValid() (isValid bool) {
|
func (config Config) IsValid() (isValid bool) {
|
||||||
isValid = true
|
isValid = true
|
||||||
@@ -53,47 +91,75 @@ func (config Config) IsValid() (isValid bool) {
|
|||||||
// Validate alerts
|
// Validate alerts
|
||||||
if config.Alerts == nil || len(config.Alerts) == 0 {
|
if config.Alerts == nil || len(config.Alerts) == 0 {
|
||||||
// This should never happen because there is a default alert named 'log' for now
|
// This should never happen because there is a default alert named 'log' for now
|
||||||
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
|
slog.Errorf("Invalid alert configuration: Must provide at least one alert")
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, alert := range config.Alerts {
|
for _, alert := range config.Alerts {
|
||||||
if !alert.IsValid() {
|
if !alert.IsValid() {
|
||||||
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
|
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
|
} else {
|
||||||
|
slog.Debugf("Loaded alert %s", alert.Name)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Validate monitors
|
// Validate monitors
|
||||||
if config.Monitors == nil || len(config.Monitors) == 0 {
|
if config.Monitors == nil || len(config.Monitors) == 0 {
|
||||||
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
|
slog.Errorf("Invalid monitor configuration: Must provide at least one monitor")
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, monitor := range config.Monitors {
|
for _, monitor := range config.Monitors {
|
||||||
if !monitor.IsValid() {
|
if !monitor.IsValid() {
|
||||||
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
|
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
// Check that all Monitor alerts actually exist
|
// Check that all Monitor alerts actually exist
|
||||||
for _, isUp := range []bool{true, false} {
|
for _, isUp := range []bool{true, false} {
|
||||||
for _, alertName := range monitor.GetAlertNames(isUp) {
|
for _, alertName := range monitor.GetAlertNames(isUp) {
|
||||||
if _, ok := config.Alerts[alertName]; !ok {
|
if _, ok := config.Alerts[alertName]; !ok {
|
||||||
log.Printf(
|
slog.Errorf(
|
||||||
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
|
"Invalid monitor configuration: %s. Unknown alert %s",
|
||||||
monitor.Name, alertName,
|
monitor.Name, alertName,
|
||||||
)
|
)
|
||||||
|
|
||||||
isValid = false
|
isValid = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return isValid
|
||||||
}
|
}
|
||||||
|
|
||||||
// Init performs extra initialization on top of loading the config from file
|
// Init performs extra initialization on top of loading the config from file
|
||||||
func (config *Config) Init() (err error) {
|
func (config *Config) Init() (err error) {
|
||||||
|
for _, monitor := range config.Monitors {
|
||||||
|
if monitor.AlertAfter == 0 && config.DefaultAlertAfter > 0 {
|
||||||
|
monitor.AlertAfter = config.DefaultAlertAfter
|
||||||
|
}
|
||||||
|
|
||||||
|
if monitor.AlertEvery == nil && config.DefaultAlertEvery != nil {
|
||||||
|
monitor.AlertEvery = config.DefaultAlertEvery
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(monitor.AlertDown) == 0 && len(config.DefaultAlertDown) > 0 {
|
||||||
|
monitor.AlertDown = config.DefaultAlertDown
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(monitor.AlertUp) == 0 && len(config.DefaultAlertUp) > 0 {
|
||||||
|
monitor.AlertUp = config.DefaultAlertUp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for name, alert := range config.Alerts {
|
for name, alert := range config.Alerts {
|
||||||
alert.Name = name
|
alert.Name = name
|
||||||
|
|
||||||
if err = alert.BuildTemplates(); err != nil {
|
if err = alert.BuildTemplates(); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
@@ -114,28 +180,30 @@ func LoadConfig(filePath string) (config Config, err error) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if LogDebug {
|
slog.Debugf("Config values:\n%v\n", config)
|
||||||
log.Printf("DEBUG: Config values:\n%v\n", config)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add log alert if not present
|
// Add log alert if not present
|
||||||
if PyCompat {
|
if PyCompat {
|
||||||
// Intialize alerts list if not present
|
// Initialize alerts list if not present
|
||||||
if config.Alerts == nil {
|
if config.Alerts == nil {
|
||||||
config.Alerts = map[string]*Alert{}
|
config.Alerts = map[string]*Alert{}
|
||||||
}
|
}
|
||||||
|
|
||||||
if _, ok := config.Alerts["log"]; !ok {
|
if _, ok := config.Alerts["log"]; !ok {
|
||||||
config.Alerts["log"] = NewLogAlert()
|
config.Alerts["log"] = NewLogAlert()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !config.IsValid() {
|
// Finish initializing configuration
|
||||||
err = errors.New("Invalid configuration")
|
if err = config.Init(); err != nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish initializing configuration
|
if !config.IsValid() {
|
||||||
err = config.Init()
|
err = errInvalidConfig
|
||||||
|
|
||||||
return
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
return config, err
|
||||||
}
|
}
|
||||||
|
|||||||
+42
-1
@@ -3,6 +3,7 @@ package main
|
|||||||
import (
|
import (
|
||||||
"log"
|
"log"
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestLoadConfig(t *testing.T) {
|
func TestLoadConfig(t *testing.T) {
|
||||||
@@ -13,6 +14,7 @@ func TestLoadConfig(t *testing.T) {
|
|||||||
pyCompat bool
|
pyCompat bool
|
||||||
}{
|
}{
|
||||||
{"./test/valid-config.yml", false, "Valid config file", false},
|
{"./test/valid-config.yml", false, "Valid config file", false},
|
||||||
|
{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
|
||||||
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
|
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
|
||||||
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
|
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
|
||||||
{"./test/does-not-exist", true, "Invalid config path", false},
|
{"./test/does-not-exist", true, "Invalid config path", false},
|
||||||
@@ -27,20 +29,50 @@ func TestLoadConfig(t *testing.T) {
|
|||||||
PyCompat = c.pyCompat
|
PyCompat = c.pyCompat
|
||||||
_, err := LoadConfig(c.configPath)
|
_, err := LoadConfig(c.configPath)
|
||||||
hasErr := (err != nil)
|
hasErr := (err != nil)
|
||||||
|
|
||||||
if hasErr != c.expectErr {
|
if hasErr != c.expectErr {
|
||||||
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set PyCompat to default value
|
// Set PyCompat to default value
|
||||||
PyCompat = false
|
PyCompat = false
|
||||||
log.Println("-----")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestIntervalParsing(t *testing.T) {
|
||||||
|
log.Printf("Testing case TestIntervalParsing")
|
||||||
|
|
||||||
|
config, err := LoadConfig("./test/valid-config.yml")
|
||||||
|
if err != nil {
|
||||||
|
t.Errorf("Failed loading config: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
oneSecond := time.Second
|
||||||
|
tenSeconds := 10 * time.Second
|
||||||
|
oneMinute := time.Minute
|
||||||
|
|
||||||
|
// validate top level interval seconds represented as an int
|
||||||
|
if config.CheckInterval.Value() != oneSecond {
|
||||||
|
t.Errorf("Incorrectly parsed int seconds. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Monitors[0].CheckInterval.Value() != tenSeconds {
|
||||||
|
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||||
|
}
|
||||||
|
|
||||||
|
if config.Monitors[1].CheckInterval.Value() != oneMinute {
|
||||||
|
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Println("-----")
|
||||||
|
}
|
||||||
|
|
||||||
// TestMultiLineConfig is a more complicated test stepping through the parsing
|
// TestMultiLineConfig is a more complicated test stepping through the parsing
|
||||||
// and execution of mutli-line strings presented in YAML
|
// and execution of mutli-line strings presented in YAML
|
||||||
func TestMultiLineConfig(t *testing.T) {
|
func TestMultiLineConfig(t *testing.T) {
|
||||||
log.Println("Testing multi-line string config")
|
log.Println("Testing multi-line string config")
|
||||||
|
|
||||||
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
||||||
@@ -48,8 +80,10 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(parse > string)")
|
log.Println("TestMultiLineConfig(parse > string)")
|
||||||
|
|
||||||
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
||||||
actual := config.Monitors[0].Command.ShellCommand
|
actual := config.Monitors[0].Command.ShellCommand
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(>) failed")
|
t.Errorf("TestMultiLineConfig(>) failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@@ -60,12 +94,15 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(execute > string)")
|
log.Println("TestMultiLineConfig(execute > string)")
|
||||||
|
|
||||||
_, notice := config.Monitors[0].Check()
|
_, notice := config.Monitors[0].Check()
|
||||||
if notice == nil {
|
if notice == nil {
|
||||||
t.Fatalf("Did not receive an alert notice")
|
t.Fatalf("Did not receive an alert notice")
|
||||||
}
|
}
|
||||||
|
|
||||||
expected = "Some string with stuff\n<angle brackets>\n"
|
expected = "Some string with stuff\n<angle brackets>\n"
|
||||||
actual = notice.LastCheckOutput
|
actual = notice.LastCheckOutput
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@@ -76,8 +113,10 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(parse | string)")
|
log.Println("TestMultiLineConfig(parse | string)")
|
||||||
|
|
||||||
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
||||||
actual = config.Alerts["log_shell"].Command.ShellCommand
|
actual = config.Alerts["log_shell"].Command.ShellCommand
|
||||||
|
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(|) failed")
|
t.Errorf("TestMultiLineConfig(|) failed")
|
||||||
t.Logf("string expected=`%v`", expected)
|
t.Logf("string expected=`%v`", expected)
|
||||||
@@ -88,10 +127,12 @@ func TestMultiLineConfig(t *testing.T) {
|
|||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
log.Println("TestMultiLineConfig(execute | string)")
|
log.Println("TestMultiLineConfig(execute | string)")
|
||||||
|
|
||||||
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Errorf("Execution of alert failed")
|
t.Errorf("Execution of alert failed")
|
||||||
}
|
}
|
||||||
|
|
||||||
expected = "Some string with stuff\n<angle brackets>\n"
|
expected = "Some string with stuff\n<angle brackets>\n"
|
||||||
if expected != actual {
|
if expected != actual {
|
||||||
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
module git.iamthefij.com/iamthefij/minitor-go
|
module git.iamthefij.com/iamthefij/minitor-go
|
||||||
|
|
||||||
go 1.12
|
go 1.15
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
git.iamthefij.com/iamthefij/slog v1.3.0
|
||||||
github.com/prometheus/client_golang v1.2.1
|
github.com/prometheus/client_golang v1.2.1
|
||||||
gopkg.in/yaml.v2 v2.2.4
|
gopkg.in/yaml.v2 v2.2.4
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -1,3 +1,5 @@
|
|||||||
|
git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
|
||||||
|
git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
|
||||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||||
|
|||||||
@@ -1,16 +1,15 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"errors"
|
||||||
"flag"
|
"flag"
|
||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.iamthefij.com/iamthefij/slog"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
// LogDebug will control whether debug messsages should be logged
|
|
||||||
LogDebug = false
|
|
||||||
|
|
||||||
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
||||||
ExportMetrics = false
|
ExportMetrics = false
|
||||||
// MetricsPort is the port to expose metrics on
|
// MetricsPort is the port to expose metrics on
|
||||||
@@ -23,57 +22,67 @@ var (
|
|||||||
|
|
||||||
// version of minitor being run
|
// version of minitor being run
|
||||||
version = "dev"
|
version = "dev"
|
||||||
|
|
||||||
|
errUnknownAlert = errors.New("unknown alert")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
|
||||||
|
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
|
||||||
|
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
||||||
|
|
||||||
|
if alertNames == nil {
|
||||||
|
// This should only happen for a recovery alert. AlertDown is validated not empty
|
||||||
|
slog.Warningf(
|
||||||
|
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||||
|
alertNotice.MonitorName, alertNotice.IsUp,
|
||||||
|
)
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, alertName := range alertNames {
|
||||||
|
if alert, ok := config.Alerts[alertName]; ok {
|
||||||
|
output, err := alert.Send(*alertNotice)
|
||||||
|
if err != nil {
|
||||||
|
slog.Errorf(
|
||||||
|
"Alert '%s' failed. result=%v: output=%s",
|
||||||
|
alert.Name,
|
||||||
|
err,
|
||||||
|
output,
|
||||||
|
)
|
||||||
|
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Count alert metrics
|
||||||
|
Metrics.CountAlert(monitor.Name, alert.Name)
|
||||||
|
} else {
|
||||||
|
// This case should never actually happen since we validate against it
|
||||||
|
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||||
|
|
||||||
|
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func checkMonitors(config *Config) error {
|
func checkMonitors(config *Config) error {
|
||||||
|
// TODO: Run this in goroutines and capture exceptions
|
||||||
for _, monitor := range config.Monitors {
|
for _, monitor := range config.Monitors {
|
||||||
if monitor.ShouldCheck() {
|
if monitor.ShouldCheck() {
|
||||||
success, alertNotice := monitor.Check()
|
success, alertNotice := monitor.Check()
|
||||||
|
|
||||||
hasAlert := alertNotice != nil
|
hasAlert := alertNotice != nil
|
||||||
|
|
||||||
// Track status metrics
|
// Track status metrics
|
||||||
Metrics.SetMonitorStatus(monitor.Name, success)
|
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
|
||||||
|
|
||||||
// Should probably consider refactoring everything below here
|
|
||||||
if alertNotice != nil {
|
if alertNotice != nil {
|
||||||
if LogDebug {
|
err := sendAlerts(config, monitor, alertNotice)
|
||||||
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
|
// If there was an error in sending an alert, exit early and bubble it up
|
||||||
}
|
if err != nil {
|
||||||
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
return err
|
||||||
if alertNames == nil {
|
|
||||||
// This should only happen for a recovery alert. AlertDown is validated not empty
|
|
||||||
log.Printf(
|
|
||||||
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
|
||||||
alertNotice.MonitorName, alertNotice.IsUp,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
for _, alertName := range alertNames {
|
|
||||||
if alert, ok := config.Alerts[alertName]; ok {
|
|
||||||
output, err := alert.Send(*alertNotice)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf(
|
|
||||||
"ERROR: Alert '%s' failed. result=%v: output=%s",
|
|
||||||
alert.Name,
|
|
||||||
err,
|
|
||||||
output,
|
|
||||||
)
|
|
||||||
return fmt.Errorf(
|
|
||||||
"Unsuccessfully triggered alert '%s'. "+
|
|
||||||
"Crashing to avoid false negatives: %v",
|
|
||||||
alert.Name,
|
|
||||||
err,
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Count alert metrics
|
|
||||||
Metrics.CountAlert(monitor.Name, alert.Name)
|
|
||||||
} else {
|
|
||||||
// This case should never actually happen since we validate against it
|
|
||||||
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
|
||||||
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -83,41 +92,38 @@ func checkMonitors(config *Config) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
// Get debug flag
|
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
|
||||||
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
|
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
||||||
|
|
||||||
|
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
|
||||||
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
||||||
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
|
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
|
||||||
flag.IntVar(&MetricsPort, "metrics-port", 8080, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
|
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
|
||||||
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
|
|
||||||
var configPath = flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
|
||||||
flag.Parse()
|
flag.Parse()
|
||||||
|
|
||||||
// Print version if flag is provided
|
// Print version if flag is provided
|
||||||
if *showVersion {
|
if *showVersion {
|
||||||
log.Println("Minitor version:", version)
|
fmt.Println("Minitor version:", version)
|
||||||
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Load configuration
|
// Load configuration
|
||||||
config, err := LoadConfig(*configPath)
|
config, err := LoadConfig(*configPath)
|
||||||
if err != nil {
|
slog.OnErrFatalf(err, "Error loading config: %v", err)
|
||||||
log.Fatalf("Error loading config: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Serve metrics exporter, if specified
|
// Serve metrics exporter, if specified
|
||||||
if ExportMetrics {
|
if ExportMetrics {
|
||||||
log.Println("INFO: Exporting metrics to Prometheus")
|
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
|
||||||
|
|
||||||
go ServeMetrics()
|
go ServeMetrics()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start main loop
|
// Start main loop
|
||||||
for {
|
for {
|
||||||
err = checkMonitors(&config)
|
err = checkMonitors(&config)
|
||||||
if err != nil {
|
slog.OnErrPanicf(err, "Error checking monitors")
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
sleepTime := time.Duration(config.CheckInterval) * time.Second
|
time.Sleep(config.CheckInterval.Value())
|
||||||
time.Sleep(sleepTime)
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+44
-22
@@ -16,7 +16,7 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
{
|
{
|
||||||
config: Config{
|
config: Config{
|
||||||
Monitors: []*Monitor{
|
Monitors: []*Monitor{
|
||||||
&Monitor{
|
{
|
||||||
Name: "Success",
|
Name: "Success",
|
||||||
Command: CommandOrShell{Command: []string{"true"}},
|
Command: CommandOrShell{Command: []string{"true"}},
|
||||||
},
|
},
|
||||||
@@ -28,36 +28,24 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
{
|
{
|
||||||
config: Config{
|
config: Config{
|
||||||
Monitors: []*Monitor{
|
Monitors: []*Monitor{
|
||||||
&Monitor{
|
{
|
||||||
Name: "Failure",
|
Name: "Failure",
|
||||||
Command: CommandOrShell{Command: []string{"false"}},
|
Command: CommandOrShell{Command: []string{"false"}},
|
||||||
AlertAfter: 1,
|
AlertAfter: 1,
|
||||||
},
|
},
|
||||||
&Monitor{
|
|
||||||
Name: "Failure",
|
|
||||||
Command: CommandOrShell{Command: []string{"false"}},
|
|
||||||
AlertDown: []string{"unknown"},
|
|
||||||
AlertAfter: 1,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectErr: false,
|
expectErr: false,
|
||||||
name: "Monitor failure, no and unknown alerts",
|
name: "Monitor failure, no alerts",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
config: Config{
|
config: Config{
|
||||||
Monitors: []*Monitor{
|
Monitors: []*Monitor{
|
||||||
&Monitor{
|
{
|
||||||
Name: "Success",
|
Name: "Success",
|
||||||
Command: CommandOrShell{Command: []string{"ls"}},
|
Command: CommandOrShell{Command: []string{"ls"}},
|
||||||
alertCount: 1,
|
alertCount: 1,
|
||||||
},
|
},
|
||||||
&Monitor{
|
|
||||||
Name: "Success",
|
|
||||||
Command: CommandOrShell{Command: []string{"true"}},
|
|
||||||
AlertUp: []string{"unknown"},
|
|
||||||
alertCount: 1,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectErr: false,
|
expectErr: false,
|
||||||
@@ -66,7 +54,35 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
{
|
{
|
||||||
config: Config{
|
config: Config{
|
||||||
Monitors: []*Monitor{
|
Monitors: []*Monitor{
|
||||||
&Monitor{
|
{
|
||||||
|
Name: "Failure",
|
||||||
|
Command: CommandOrShell{Command: []string{"false"}},
|
||||||
|
AlertDown: []string{"unknown"},
|
||||||
|
AlertAfter: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectErr: true,
|
||||||
|
name: "Monitor failure, unknown alerts",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
config: Config{
|
||||||
|
Monitors: []*Monitor{
|
||||||
|
{
|
||||||
|
Name: "Success",
|
||||||
|
Command: CommandOrShell{Command: []string{"true"}},
|
||||||
|
AlertUp: []string{"unknown"},
|
||||||
|
alertCount: 1,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectErr: true,
|
||||||
|
name: "Monitor recovery, unknown alerts",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
config: Config{
|
||||||
|
Monitors: []*Monitor{
|
||||||
|
{
|
||||||
Name: "Failure",
|
Name: "Failure",
|
||||||
Command: CommandOrShell{Command: []string{"false"}},
|
Command: CommandOrShell{Command: []string{"false"}},
|
||||||
AlertDown: []string{"good"},
|
AlertDown: []string{"good"},
|
||||||
@@ -74,7 +90,7 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
Alerts: map[string]*Alert{
|
Alerts: map[string]*Alert{
|
||||||
"good": &Alert{
|
"good": {
|
||||||
Command: CommandOrShell{Command: []string{"true"}},
|
Command: CommandOrShell{Command: []string{"true"}},
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
@@ -85,7 +101,7 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
{
|
{
|
||||||
config: Config{
|
config: Config{
|
||||||
Monitors: []*Monitor{
|
Monitors: []*Monitor{
|
||||||
&Monitor{
|
{
|
||||||
Name: "Failure",
|
Name: "Failure",
|
||||||
Command: CommandOrShell{Command: []string{"false"}},
|
Command: CommandOrShell{Command: []string{"false"}},
|
||||||
AlertDown: []string{"bad"},
|
AlertDown: []string{"bad"},
|
||||||
@@ -93,7 +109,7 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
Alerts: map[string]*Alert{
|
Alerts: map[string]*Alert{
|
||||||
"bad": &Alert{
|
"bad": {
|
||||||
Name: "bad",
|
Name: "bad",
|
||||||
Command: CommandOrShell{Command: []string{"false"}},
|
Command: CommandOrShell{Command: []string{"false"}},
|
||||||
},
|
},
|
||||||
@@ -105,10 +121,16 @@ func TestCheckMonitors(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
c.config.Init()
|
err := c.config.Init()
|
||||||
err := checkMonitors(&c.config)
|
if err != nil {
|
||||||
|
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = checkMonitors(&c.config)
|
||||||
if err == nil && c.expectErr {
|
if err == nil && c.expectErr {
|
||||||
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
|
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
|
||||||
|
} else if err != nil && !c.expectErr {
|
||||||
|
t.Errorf("checkMonitors(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+17
-1
@@ -19,6 +19,7 @@ import (
|
|||||||
type MinitorMetrics struct {
|
type MinitorMetrics struct {
|
||||||
alertCount *prometheus.CounterVec
|
alertCount *prometheus.CounterVec
|
||||||
checkCount *prometheus.CounterVec
|
checkCount *prometheus.CounterVec
|
||||||
|
checkTime *prometheus.GaugeVec
|
||||||
monitorStatus *prometheus.GaugeVec
|
monitorStatus *prometheus.GaugeVec
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
|
|||||||
},
|
},
|
||||||
[]string{"monitor", "status", "is_alert"},
|
[]string{"monitor", "status", "is_alert"},
|
||||||
),
|
),
|
||||||
|
checkTime: prometheus.NewGaugeVec(
|
||||||
|
prometheus.GaugeOpts{
|
||||||
|
Name: "minitor_check_milliseconds",
|
||||||
|
Help: "Time in miliseconds that a check ran for",
|
||||||
|
},
|
||||||
|
[]string{"monitor", "status"},
|
||||||
|
),
|
||||||
monitorStatus: prometheus.NewGaugeVec(
|
monitorStatus: prometheus.NewGaugeVec(
|
||||||
prometheus.GaugeOpts{
|
prometheus.GaugeOpts{
|
||||||
Name: "minitor_monitor_up_count",
|
Name: "minitor_monitor_up_count",
|
||||||
@@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
|
|||||||
// Register newly created metrics
|
// Register newly created metrics
|
||||||
prometheus.MustRegister(metrics.alertCount)
|
prometheus.MustRegister(metrics.alertCount)
|
||||||
prometheus.MustRegister(metrics.checkCount)
|
prometheus.MustRegister(metrics.checkCount)
|
||||||
|
prometheus.MustRegister(metrics.checkTime)
|
||||||
prometheus.MustRegister(metrics.monitorStatus)
|
prometheus.MustRegister(metrics.monitorStatus)
|
||||||
|
|
||||||
return metrics
|
return metrics
|
||||||
@@ -63,11 +72,12 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
|||||||
if isUp {
|
if isUp {
|
||||||
val = 1.0
|
val = 1.0
|
||||||
}
|
}
|
||||||
|
|
||||||
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CountCheck counts the result of a particular Monitor check
|
// CountCheck counts the result of a particular Monitor check
|
||||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
|
||||||
status := "failure"
|
status := "failure"
|
||||||
if isSuccess {
|
if isSuccess {
|
||||||
status = "success"
|
status = "success"
|
||||||
@@ -81,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
|
|||||||
metrics.checkCount.With(
|
metrics.checkCount.With(
|
||||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||||
).Inc()
|
).Inc()
|
||||||
|
|
||||||
|
metrics.checkTime.With(
|
||||||
|
prometheus.Labels{"monitor": monitor, "status": status},
|
||||||
|
).Set(float64(ms))
|
||||||
}
|
}
|
||||||
|
|
||||||
// CountAlert counts an alert
|
// CountAlert counts an alert
|
||||||
@@ -96,6 +110,8 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
|
|||||||
// ServeMetrics starts an http server with a Prometheus metrics handler
|
// ServeMetrics starts an http server with a Prometheus metrics handler
|
||||||
func ServeMetrics() {
|
func ServeMetrics() {
|
||||||
http.Handle("/metrics", promhttp.Handler())
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
host := fmt.Sprintf(":%d", MetricsPort)
|
host := fmt.Sprintf(":%d", MetricsPort)
|
||||||
|
|
||||||
_ = http.ListenAndServe(host, nil)
|
_ = http.ListenAndServe(host, nil)
|
||||||
}
|
}
|
||||||
|
|||||||
+53
-43
@@ -1,28 +1,31 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"log"
|
|
||||||
"math"
|
"math"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"git.iamthefij.com/iamthefij/slog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Monitor represents a particular periodic check of a command
|
// Monitor represents a particular periodic check of a command
|
||||||
type Monitor struct {
|
type Monitor struct { //nolint:maligned
|
||||||
// Config values
|
// Config values
|
||||||
|
AlertAfter int16 `yaml:"alert_after"`
|
||||||
|
AlertEvery *int16 `yaml:"alert_every"`
|
||||||
|
CheckInterval SecondsOrDuration `yaml:"check_interval"`
|
||||||
Name string
|
Name string
|
||||||
Command CommandOrShell
|
|
||||||
AlertDown []string `yaml:"alert_down"`
|
AlertDown []string `yaml:"alert_down"`
|
||||||
AlertUp []string `yaml:"alert_up"`
|
AlertUp []string `yaml:"alert_up"`
|
||||||
CheckInterval float64 `yaml:"check_interval"`
|
Command CommandOrShell
|
||||||
AlertAfter int16 `yaml:"alert_after"`
|
|
||||||
AlertEvery int16 `yaml:"alert_every"`
|
|
||||||
// Other values
|
// Other values
|
||||||
lastCheck time.Time
|
alertCount int16
|
||||||
lastOutput string
|
failureCount int16
|
||||||
alertCount int16
|
lastCheck time.Time
|
||||||
failureCount int16
|
lastSuccess time.Time
|
||||||
lastSuccess time.Time
|
lastOutput string
|
||||||
|
lastCheckDuration time.Duration
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||||
@@ -40,8 +43,9 @@ func (monitor Monitor) ShouldCheck() bool {
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
|
sinceLastCheck := time.Since(monitor.lastCheck)
|
||||||
return sinceLastCheck >= monitor.CheckInterval
|
|
||||||
|
return sinceLastCheck >= monitor.CheckInterval.Value()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check will run the command configured by the Monitor and return a status
|
// Check will run the command configured by the Monitor and return a status
|
||||||
@@ -54,11 +58,14 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
|||||||
cmd = ShellCommand(monitor.Command.ShellCommand)
|
cmd = ShellCommand(monitor.Command.ShellCommand)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkStartTime := time.Now()
|
||||||
output, err := cmd.CombinedOutput()
|
output, err := cmd.CombinedOutput()
|
||||||
monitor.lastCheck = time.Now()
|
monitor.lastCheck = time.Now()
|
||||||
monitor.lastOutput = string(output)
|
monitor.lastOutput = string(output)
|
||||||
|
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
|
||||||
|
|
||||||
var alertNotice *AlertNotice
|
var alertNotice *AlertNotice
|
||||||
|
|
||||||
isSuccess := (err == nil)
|
isSuccess := (err == nil)
|
||||||
if isSuccess {
|
if isSuccess {
|
||||||
alertNotice = monitor.success()
|
alertNotice = monitor.success()
|
||||||
@@ -66,17 +73,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
|||||||
alertNotice = monitor.failure()
|
alertNotice = monitor.failure()
|
||||||
}
|
}
|
||||||
|
|
||||||
if LogDebug {
|
slog.Debugf("Command output: %s", monitor.lastOutput)
|
||||||
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
|
slog.OnErrWarnf(err, "Command result: %v", err)
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
if LogDebug {
|
|
||||||
log.Printf("DEBUG: Command result: %v", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
log.Printf(
|
slog.Infof(
|
||||||
"INFO: %s success=%t, alert=%t",
|
"%s success=%t, alert=%t",
|
||||||
monitor.Name,
|
monitor.Name,
|
||||||
isSuccess,
|
isSuccess,
|
||||||
alertNotice != nil,
|
alertNotice != nil,
|
||||||
@@ -85,15 +86,22 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
|||||||
return isSuccess, alertNotice
|
return isSuccess, alertNotice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor Monitor) isUp() bool {
|
// IsUp returns the status of the current monitor
|
||||||
|
func (monitor Monitor) IsUp() bool {
|
||||||
return monitor.alertCount == 0
|
return monitor.alertCount == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LastCheckMilliseconds gives number of miliseconds the last check ran for
|
||||||
|
func (monitor Monitor) LastCheckMilliseconds() int64 {
|
||||||
|
return monitor.lastCheckDuration.Milliseconds()
|
||||||
|
}
|
||||||
|
|
||||||
func (monitor *Monitor) success() (notice *AlertNotice) {
|
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||||
if !monitor.isUp() {
|
if !monitor.IsUp() {
|
||||||
// Alert that we have recovered
|
// Alert that we have recovered
|
||||||
notice = monitor.createAlertNotice(true)
|
notice = monitor.createAlertNotice(true)
|
||||||
}
|
}
|
||||||
|
|
||||||
monitor.failureCount = 0
|
monitor.failureCount = 0
|
||||||
monitor.alertCount = 0
|
monitor.alertCount = 0
|
||||||
monitor.lastSuccess = time.Now()
|
monitor.lastSuccess = time.Now()
|
||||||
@@ -105,15 +113,14 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
|||||||
monitor.failureCount++
|
monitor.failureCount++
|
||||||
// If we haven't hit the minimum failures, we can exit
|
// If we haven't hit the minimum failures, we can exit
|
||||||
if monitor.failureCount < monitor.getAlertAfter() {
|
if monitor.failureCount < monitor.getAlertAfter() {
|
||||||
if LogDebug {
|
slog.Debugf(
|
||||||
log.Printf(
|
"%s failed but did not hit minimum failures. "+
|
||||||
"DEBUG: %s failed but did not hit minimum failures. "+
|
"Count: %v alert after: %v",
|
||||||
"Count: %v alert after: %v",
|
monitor.Name,
|
||||||
monitor.Name,
|
monitor.failureCount,
|
||||||
monitor.failureCount,
|
monitor.getAlertAfter(),
|
||||||
monitor.getAlertAfter(),
|
)
|
||||||
)
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -121,19 +128,20 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
|||||||
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
||||||
|
|
||||||
// Use alert cadence to determine if we should alert
|
// Use alert cadence to determine if we should alert
|
||||||
if monitor.AlertEvery > 0 {
|
switch {
|
||||||
// Handle integer number of failures before alerting
|
case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
|
||||||
if failureCount%monitor.AlertEvery == 0 {
|
|
||||||
notice = monitor.createAlertNotice(false)
|
|
||||||
}
|
|
||||||
} else if monitor.AlertEvery == 0 {
|
|
||||||
// Handle alerting on first failure only
|
// Handle alerting on first failure only
|
||||||
if failureCount == 0 {
|
if failureCount == 0 {
|
||||||
notice = monitor.createAlertNotice(false)
|
notice = monitor.createAlertNotice(false)
|
||||||
}
|
}
|
||||||
} else {
|
case *monitor.AlertEvery > 0:
|
||||||
|
// Handle integer number of failures before alerting
|
||||||
|
if failureCount%*monitor.AlertEvery == 0 {
|
||||||
|
notice = monitor.createAlertNotice(false)
|
||||||
|
}
|
||||||
|
default:
|
||||||
// Handle negative numbers indicating an exponential backoff
|
// Handle negative numbers indicating an exponential backoff
|
||||||
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
|
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
|
||||||
notice = monitor.createAlertNotice(false)
|
notice = monitor.createAlertNotice(false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -143,7 +151,7 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
|||||||
monitor.alertCount++
|
monitor.alertCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
return
|
return notice
|
||||||
}
|
}
|
||||||
|
|
||||||
func (monitor Monitor) getAlertAfter() int16 {
|
func (monitor Monitor) getAlertAfter() int16 {
|
||||||
@@ -152,6 +160,7 @@ func (monitor Monitor) getAlertAfter() int16 {
|
|||||||
if monitor.AlertAfter == 0 {
|
if monitor.AlertAfter == 0 {
|
||||||
return 1
|
return 1
|
||||||
}
|
}
|
||||||
|
|
||||||
return monitor.AlertAfter
|
return monitor.AlertAfter
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -160,6 +169,7 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
|
|||||||
if up {
|
if up {
|
||||||
return monitor.AlertUp
|
return monitor.AlertUp
|
||||||
}
|
}
|
||||||
|
|
||||||
return monitor.AlertDown
|
return monitor.AlertDown
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+51
-23
@@ -22,11 +22,13 @@ func TestMonitorIsValid(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.monitor.IsValid()
|
actual := c.monitor.IsValid()
|
||||||
if actual != c.expected {
|
if actual != c.expected {
|
||||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -43,9 +45,9 @@ func TestMonitorShouldCheck(t *testing.T) {
|
|||||||
name string
|
name string
|
||||||
}{
|
}{
|
||||||
{Monitor{}, true, "Empty"},
|
{Monitor{}, true, "Empty"},
|
||||||
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
|
{Monitor{lastCheck: timeNow, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "Just checked"},
|
||||||
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
|
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "-10s"},
|
||||||
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
|
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, true, "-20s"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
@@ -56,7 +58,7 @@ func TestMonitorShouldCheck(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// TestMonitorIsUp tests the Monitor.isUp()
|
// TestMonitorIsUp tests the Monitor.IsUp()
|
||||||
func TestMonitorIsUp(t *testing.T) {
|
func TestMonitorIsUp(t *testing.T) {
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
monitor Monitor
|
monitor Monitor
|
||||||
@@ -71,11 +73,13 @@ func TestMonitorIsUp(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
actual := c.monitor.isUp()
|
|
||||||
|
actual := c.monitor.IsUp()
|
||||||
if actual != c.expected {
|
if actual != c.expected {
|
||||||
t.Errorf("isUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
t.Errorf("IsUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -96,11 +100,13 @@ func TestMonitorGetAlertNames(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
actual := c.monitor.GetAlertNames(c.up)
|
actual := c.monitor.GetAlertNames(c.up)
|
||||||
if !EqualSliceString(actual, c.expected) {
|
if !EqualSliceString(actual, c.expected) {
|
||||||
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -119,12 +125,15 @@ func TestMonitorSuccess(t *testing.T) {
|
|||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := c.monitor.success()
|
notice := c.monitor.success()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -132,27 +141,32 @@ func TestMonitorSuccess(t *testing.T) {
|
|||||||
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
|
// TestMonitorFailureAlertAfter tests that alerts will not trigger until
|
||||||
// hitting the threshold provided by AlertAfter
|
// hitting the threshold provided by AlertAfter
|
||||||
func TestMonitorFailureAlertAfter(t *testing.T) {
|
func TestMonitorFailureAlertAfter(t *testing.T) {
|
||||||
|
var alertEvery int16 = 1
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
monitor Monitor
|
monitor Monitor
|
||||||
expectNotice bool
|
expectNotice bool
|
||||||
name string
|
name string
|
||||||
}{
|
}{
|
||||||
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
|
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0
|
||||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"},
|
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: first failure"},
|
||||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"},
|
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery}, true, "Alert after 1: second failure"},
|
||||||
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"},
|
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: &alertEvery}, false, "Alert after 20: first failure"},
|
||||||
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"},
|
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 20th failure"},
|
||||||
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"},
|
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: &alertEvery}, true, "Alert after 20: 21st failure"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := c.monitor.failure()
|
notice := c.monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -160,6 +174,11 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
|
|||||||
// TestMonitorFailureAlertEvery tests that alerts will trigger
|
// TestMonitorFailureAlertEvery tests that alerts will trigger
|
||||||
// on the expected intervals
|
// on the expected intervals
|
||||||
func TestMonitorFailureAlertEvery(t *testing.T) {
|
func TestMonitorFailureAlertEvery(t *testing.T) {
|
||||||
|
var alertEvery0, alertEvery1, alertEvery2 int16
|
||||||
|
alertEvery0 = 0
|
||||||
|
alertEvery1 = 1
|
||||||
|
alertEvery2 = 2
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
monitor Monitor
|
monitor Monitor
|
||||||
expectNotice bool
|
expectNotice bool
|
||||||
@@ -174,20 +193,20 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
|
|||||||
|
|
||||||
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
|
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
|
||||||
*/
|
*/
|
||||||
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
|
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to nil
|
||||||
// Alert first time only, after 1
|
// Alert first time only, after 1
|
||||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"},
|
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery0}, true, "Alert first time only after 1: first failure"},
|
||||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
|
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: second failure"},
|
||||||
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
|
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery0}, false, "Alert first time only after 1: third failure"},
|
||||||
// Alert every time, after 1
|
// Alert every time, after 1
|
||||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"},
|
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: first failure"},
|
||||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
|
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: second failure"},
|
||||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
|
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery1}, true, "Alert every time after 1: third failure"},
|
||||||
// Alert every other time, after 1
|
// Alert every other time, after 1
|
||||||
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"},
|
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: first failure"},
|
||||||
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
|
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: second failure"},
|
||||||
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
|
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: &alertEvery2}, true, "Alert every other time after 1: third failure"},
|
||||||
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
|
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: &alertEvery2}, false, "Alert every other time after 1: fourth failure"},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
@@ -195,10 +214,12 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
|
|||||||
|
|
||||||
notice := c.monitor.failure()
|
notice := c.monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -206,6 +227,8 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
|
|||||||
// TestMonitorFailureExponential tests that alerts will trigger
|
// TestMonitorFailureExponential tests that alerts will trigger
|
||||||
// with an exponential backoff after repeated failures
|
// with an exponential backoff after repeated failures
|
||||||
func TestMonitorFailureExponential(t *testing.T) {
|
func TestMonitorFailureExponential(t *testing.T) {
|
||||||
|
var alertEveryExp int16 = -1
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
expectNotice bool
|
expectNotice bool
|
||||||
name string
|
name string
|
||||||
@@ -222,16 +245,19 @@ func TestMonitorFailureExponential(t *testing.T) {
|
|||||||
|
|
||||||
// Unlike previous tests, this one requires a static Monitor with repeated
|
// Unlike previous tests, this one requires a static Monitor with repeated
|
||||||
// calls to the failure method
|
// calls to the failure method
|
||||||
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
|
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: &alertEveryExp}
|
||||||
|
|
||||||
for _, c := range cases {
|
for _, c := range cases {
|
||||||
log.Printf("Testing case %s", c.name)
|
log.Printf("Testing case %s", c.name)
|
||||||
|
|
||||||
notice := monitor.failure()
|
notice := monitor.failure()
|
||||||
hasNotice := (notice != nil)
|
hasNotice := (notice != nil)
|
||||||
|
|
||||||
if hasNotice != c.expectNotice {
|
if hasNotice != c.expectNotice {
|
||||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -243,6 +269,7 @@ func TestMonitorCheck(t *testing.T) {
|
|||||||
hasNotice bool
|
hasNotice bool
|
||||||
lastOutput string
|
lastOutput string
|
||||||
}
|
}
|
||||||
|
|
||||||
cases := []struct {
|
cases := []struct {
|
||||||
monitor Monitor
|
monitor Monitor
|
||||||
expect expected
|
expect expected
|
||||||
@@ -290,6 +317,7 @@ func TestMonitorCheck(t *testing.T) {
|
|||||||
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
||||||
log.Printf("Case failed: %s", c.name)
|
log.Printf("Case failed: %s", c.name)
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Println("-----")
|
log.Println("-----")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
+6
-5
@@ -3,14 +3,14 @@ check_interval: 5
|
|||||||
|
|
||||||
monitors:
|
monitors:
|
||||||
- name: Fake Website
|
- name: Fake Website
|
||||||
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
|
command: ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
|
||||||
alert_down: [log_down, mailgun_down, sms_down]
|
alert_down: [log_down, mailgun_down, sms_down]
|
||||||
alert_up: [log_up, email_up]
|
alert_up: [log_up, email_up]
|
||||||
check_interval: 10 # Must be at minimum the global `check_interval`
|
check_interval: 10 # Must be at minimum the global `check_interval`
|
||||||
alert_after: 3
|
alert_after: 3
|
||||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||||
- name: Real Website
|
- name: Real Website
|
||||||
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
|
command: ["curl", "-s", "-o", "/dev/null", "https://google.com"]
|
||||||
alert_down: [log_down, mailgun_down, sms_down]
|
alert_down: [log_down, mailgun_down, sms_down]
|
||||||
alert_up: [log_up, email_up]
|
alert_up: [log_up, email_up]
|
||||||
check_interval: 5
|
check_interval: 5
|
||||||
@@ -23,7 +23,8 @@ alerts:
|
|||||||
log_up:
|
log_up:
|
||||||
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||||
email_up:
|
email_up:
|
||||||
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
command:
|
||||||
|
[sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||||
mailgun_down:
|
mailgun_down:
|
||||||
command: >
|
command: >
|
||||||
curl -s -X POST
|
curl -s -X POST
|
||||||
|
|||||||
+15
-3
@@ -11,6 +11,7 @@ set -e
|
|||||||
# To override, export DOCKER_HOST to a new hostname
|
# To override, export DOCKER_HOST to a new hostname
|
||||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||||
container_name="$1"
|
container_name="$1"
|
||||||
|
num_log_lines="$2"
|
||||||
|
|
||||||
# Curls Docker either using a socket or URL
|
# Curls Docker either using a socket or URL
|
||||||
function curl_docker {
|
function curl_docker {
|
||||||
@@ -31,21 +32,32 @@ function get_container_id {
|
|||||||
|
|
||||||
# Returns container JSON
|
# Returns container JSON
|
||||||
function inspect_container {
|
function inspect_container {
|
||||||
local container_id=$1
|
local container_id="$1"
|
||||||
curl_docker "containers/$container_id/json"
|
curl_docker "containers/$container_id/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Gets some lines from docker log
|
||||||
|
function get_logs {
|
||||||
|
container_id="$1"
|
||||||
|
num_lines="$2"
|
||||||
|
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||||
|
}
|
||||||
|
|
||||||
if [ -z "$container_name" ]; then
|
if [ -z "$container_name" ]; then
|
||||||
echo "Usage: $0 container_name"
|
echo "Usage: $0 container_name [num_log_lines]"
|
||||||
echo "Will exit with the last status code of continer with provided name"
|
echo "Will exit with the last status code of continer with provided name"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
container_id=$(get_container_id $container_name)
|
container_id=$(get_container_id "$container_name")
|
||||||
if [ -z "$container_id" ]; then
|
if [ -z "$container_id" ]; then
|
||||||
echo "ERROR: Could not find container with name: $container_name"
|
echo "ERROR: Could not find container with name: $container_name"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
||||||
|
|
||||||
|
if [ -n "$num_log_lines" ]; then
|
||||||
|
get_logs "$container_id" "$num_log_lines"
|
||||||
|
fi
|
||||||
|
|
||||||
exit "$exit_code"
|
exit "$exit_code"
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ set -e
|
|||||||
# To override, export DOCKER_HOST to a new hostname
|
# To override, export DOCKER_HOST to a new hostname
|
||||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||||
container_name="$1"
|
container_name="$1"
|
||||||
|
num_log_lines="$2"
|
||||||
|
|
||||||
# Curls Docker either using a socket or URL
|
# Curls Docker either using a socket or URL
|
||||||
function curl_docker {
|
function curl_docker {
|
||||||
@@ -35,8 +36,15 @@ function inspect_container {
|
|||||||
curl_docker "containers/$container_id/json"
|
curl_docker "containers/$container_id/json"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Gets some lines from docker log
|
||||||
|
function get_logs {
|
||||||
|
container_id="$1"
|
||||||
|
num_lines="$2"
|
||||||
|
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||||
|
}
|
||||||
|
|
||||||
if [ -z "$container_name" ]; then
|
if [ -z "$container_name" ]; then
|
||||||
echo "Usage: $0 container_name"
|
echo "Usage: $0 container_name [num_log_lines]"
|
||||||
echo "Will return results of healthcheck for continer with provided name"
|
echo "Will return results of healthcheck for continer with provided name"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
@@ -48,6 +56,10 @@ if [ -z "$container_id" ]; then
|
|||||||
fi
|
fi
|
||||||
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
||||||
|
|
||||||
|
if [ -n "$num_log_lines" ]; then
|
||||||
|
get_logs "$container_id" "$num_log_lines"
|
||||||
|
fi
|
||||||
|
|
||||||
case "$health" in
|
case "$health" in
|
||||||
null)
|
null)
|
||||||
echo "No healthcheck results"
|
echo "No healthcheck results"
|
||||||
|
|||||||
@@ -0,0 +1,12 @@
|
|||||||
|
---
|
||||||
|
check_interval: 1
|
||||||
|
default_alert_down: ["log_command"]
|
||||||
|
default_alert_after: 1
|
||||||
|
|
||||||
|
monitors:
|
||||||
|
- name: Command
|
||||||
|
command: ["echo", "$PATH"]
|
||||||
|
|
||||||
|
alerts:
|
||||||
|
log_command:
|
||||||
|
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
|
||||||
@@ -3,21 +3,23 @@ check_interval: 1
|
|||||||
|
|
||||||
monitors:
|
monitors:
|
||||||
- name: Command
|
- name: Command
|
||||||
command: ['echo', '$PATH']
|
command: ["echo", "$PATH"]
|
||||||
alert_down: ['log_command', 'log_shell']
|
alert_down: ["log_command", "log_shell"]
|
||||||
alert_every: 0
|
alert_every: 0
|
||||||
|
check_interval: 10s
|
||||||
- name: Shell
|
- name: Shell
|
||||||
command: >
|
command: >
|
||||||
echo 'Some string with stuff';
|
echo 'Some string with stuff';
|
||||||
echo 'another line';
|
echo 'another line';
|
||||||
echo $PATH;
|
echo $PATH;
|
||||||
exit 1
|
exit 1
|
||||||
alert_down: ['log_command', 'log_shell']
|
alert_down: ["log_command", "log_shell"]
|
||||||
alert_after: 5
|
alert_after: 5
|
||||||
alert_every: 0
|
alert_every: 0
|
||||||
|
check_interval: 1m
|
||||||
|
|
||||||
alerts:
|
alerts:
|
||||||
log_command:
|
log_command:
|
||||||
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
|
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
|
||||||
log_shell:
|
log_shell:
|
||||||
command: echo "Failure on {{.MonitorName}} User is $USER"
|
command: echo "Failure on {{.MonitorName}} User is $USER"
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import (
|
|||||||
// ShellCommand takes a string and executes it as a command using `sh`
|
// ShellCommand takes a string and executes it as a command using `sh`
|
||||||
func ShellCommand(command string) *exec.Cmd {
|
func ShellCommand(command string) *exec.Cmd {
|
||||||
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
|
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
|
||||||
//log.Printf("Shell command: %v", shellCommand)
|
|
||||||
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -17,10 +17,12 @@ func EqualSliceString(a, b []string) bool {
|
|||||||
if len(a) != len(b) {
|
if len(a) != len(b) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
for i, val := range a {
|
for i, val := range a {
|
||||||
if val != b[i] {
|
if val != b[i] {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user