Compare commits

...

107 Commits

Author SHA1 Message Date
gyurix
6d2a78a266 Set alert_every to 0 for mdstat_raid monitor to prevent repeated alerts
continuous-integration/drone/push Build is passing
2026-04-14 10:55:17 +02:00
gyurix
8a765b2ab0 Refactor sendmail command to enhance recipient handling and error reporting
continuous-integration/drone/push Build is passing
2026-04-14 07:38:08 +02:00
gyurix
806a85a871 Specify full path for sendmail command in email alerts
continuous-integration/drone/push Build is passing
2026-04-13 11:31:48 +02:00
gyurix
c898454997 Enhance debug logging in sendmail function for improved clarity and output formatting
continuous-integration/drone/push Build is passing
2026-04-12 15:25:54 +02:00
gyurix
753eaeab10 Add debug logging to sendmail function for improved traceability
continuous-integration/drone/push Build is failing
2026-04-12 15:18:59 +02:00
gyurix
8e215b2574 test drone
continuous-integration/drone/push Build is passing
2026-04-12 09:58:31 +02:00
gyurix
e4ec84ea31 Refactor CI/CD pipeline and Dockerfile structure; remove obsolete build.yml and multi-stage Dockerfile, add default configuration for monitoring 2026-04-12 09:56:54 +02:00
Ian Fijolek
e262afdb1f Merge branch 'master' into next-major 2026-01-13 21:45:27 -08:00
Renovate Bot
a5268ae1f6 Update actions/setup-python action to v6 2026-01-14 05:45:09 +00:00
Renovate Bot
16ad16d873 Update actions/setup-go action to v6 2026-01-14 05:44:37 +00:00
Ian Fijolek
f4fb75610a Update variable interpolation for hcl 2026-01-13 21:43:57 -08:00
Ian Fijolek
0ae7c6dbdf Update default config file to config.hcl 2026-01-13 21:43:03 -08:00
Ian Fijolek
a06ed3540c Remove extra spaces in Makefile 2026-01-13 21:13:19 -08:00
Renovate Bot
200cfd1a2d Update actions/checkout action to v6 2026-01-07 00:02:31 +00:00
Ian Fijolek
bcbac39cad Add migration instructions 2026-01-05 16:32:59 -08:00
Ian Fijolek
afacf40ec8 Update build to better detect tags and versions 2026-01-05 16:20:51 -08:00
Ian Fijolek
c18e9c8771 Update readme with better default descriptions 2025-12-11 16:41:17 -08:00
Ian Fijolek
eb2987d3bc Tidy again and update readme 2025-12-11 16:37:02 -08:00
Ian Fijolek
945c1b1ce0 Update module path to v2 2025-12-11 16:34:18 -08:00
Ian Fijolek
b0ea3dc6d4 Bump go version 2025-12-11 16:26:05 -08:00
Ian Fijolek
5c97f2e5c4 Update defaults and add tests for them 2025-12-11 16:18:26 -08:00
Ian Fijolek
85afa3d9ef Merge branch 'master' into next-major 2025-12-11 16:17:38 -08:00
Renovate Bot
0a7aab7030 Update actions/checkout action to v6 2025-12-12 00:02:32 +00:00
IamTheFij
fb9d637614 Merge pull request 'Update actions/setup-go action to v6' (#13) from renovate/actions-setup-go-6.x into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/13
2025-12-11 23:09:33 +00:00
Renovate Bot
98be873220 Update actions/setup-go action to v6 2025-12-11 23:09:33 +00:00
IamTheFij
f59848fb61 Merge pull request 'Update actions/setup-python action to v6' (#14) from renovate/actions-setup-python-6.x into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/14
2025-12-11 23:09:11 +00:00
Renovate Bot
e74fe89cab Update actions/setup-python action to v6 2025-12-11 23:09:11 +00:00
IamTheFij
1bc8ab5dac Merge pull request 'Update alpine Docker tag to v3.23' (#7) from renovate/alpine-3.x into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/7
2025-12-11 22:33:11 +00:00
Renovate Bot
f21dce1cc6 Update alpine Docker tag to v3.23 2025-12-11 22:33:11 +00:00
IamTheFij
cfcdf04990 Merge pull request 'Update module github.com/prometheus/client_golang to v1.23.2' (#9) from renovate/github.com-prometheus-client_golang-1.x into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/9
2025-12-11 22:29:30 +00:00
Renovate Bot
9892af48d1 Update module github.com/prometheus/client_golang to v1.23.2 2025-12-11 20:46:36 +00:00
IamTheFij
c8e914d1b8 Merge pull request 'Switch to gitea actions' (#11) from gitea-actions into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/11
2025-12-11 19:07:28 +00:00
Ian Fijolek
845604c54c Use temporary hadolint hook location 2025-12-11 11:03:39 -08:00
Ian Fijolek
49acca1c79 Switch to gitea actions 2025-12-10 23:17:42 -08:00
IamTheFij
2bdafd908d Merge pull request 'Update golang Docker tag to v1.25' (#8) from renovate/golang-1.x into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/8
2025-12-10 19:27:55 +00:00
Renovate Bot
b349ada44e Update golang Docker tag to v1.25 2025-12-10 19:23:55 +00:00
IamTheFij
fbf92d924c Merge pull request 'Configure Renovate' (#6) from renovate/configure into master
Reviewed-on: https://git.iamthefij.com/iamthefij/minitor-go/pulls/6
2025-12-10 19:19:21 +00:00
Renovate Bot
fd292b005c Add renovate.json 2025-12-10 18:23:06 +00:00
Ian Fijolek
3ef06fb78d Stop getting errors when tzdata updates 2025-02-13 13:28:06 -08:00
Ian Fijolek
4aef7b7458 Remove redundant logging 2025-02-13 12:17:59 -08:00
Ian Fijolek
be2e8121c5 Refactor validation for alert and monitor to return errors 2025-02-13 12:17:59 -08:00
Ian Fijolek
9e20c00dde Move monitor init to it's own method and refactor config validate to return err 2025-02-13 12:17:59 -08:00
Ian Fijolek
3fb418151b Update readme and update some test files to be better examples 2025-02-13 12:17:59 -08:00
Ian Fijolek
df1c7aa74b Refactor test package and some field types
Fairly big test refactor and changing some of the fields from pointers
2025-02-13 12:17:59 -08:00
Ian Fijolek
7c72eabd6b Initial step of hcl migration 2025-02-13 12:17:46 -08:00
IamTheFij
0535bdf156 Fix incorrect alertCount 2025-02-13 12:12:01 -08:00
Ian Fijolek
03f0ab69fe Add documentation for default_alert_every 2025-02-13 11:26:39 -08:00
Ian Fijolek
9ec62528d9 Make linters happy 2024-11-14 13:13:16 -08:00
Ian Fijolek
312821fa8d Add pycompat warning 2024-11-14 13:09:23 -08:00
Ian Fijolek
32745c816c Improve test structures using subtests 2024-11-14 11:35:26 -08:00
Ian Fijolek
3f6c8f5a22 Breaking: Remove 'SecondsOrDuration' for check_interval
Now requires an explicit duration unit. Eg. 30s
2024-11-14 11:18:39 -08:00
Ian Fijolek
67d7e0574e Breaking: Remove python compat flag 2024-11-14 11:18:05 -08:00
Ian Fijolek
7604138c9e Use buildx 2024-04-03 12:15:11 -07:00
Ian Fijolek
f58b4c1495 Adds ability to run specified alerts on startup
This is helpful to determine if your alerts are valid before an actual failure
2024-04-03 12:03:17 -07:00
Ian Fijolek
6a2b44673e Upgrade prometheus client and protobuf 2024-04-03 11:28:01 -07:00
Ian Fijolek
01cca50532 Add tzdata
Allows setting container timezone using TZ env variable
2023-08-11 06:20:35 -07:00
Ian Fijolek
2789aa63e4 More loosely pins apk packages 2023-08-11 06:20:15 -07:00
Ian Fijolek
37db4b2db0 Update error string when failing to send alert
Wrap both originating errors
2023-08-10 16:23:02 -04:00
Ian Fijolek
41a1dbeceb Add date format functions 2023-08-10 16:22:30 -04:00
Ian Fijolek
c02d64d674 Update go to 1.20 2023-08-10 16:21:33 -04:00
Ian Fijolek
46f4561bea Update alpine and system package versions
Bump to alpine 3.18
2023-06-14 16:52:04 -07:00
Ian Fijolek
a1e0e9698b Add dig and nslookup 2023-05-05 14:07:53 -07:00
Ian Fijolek
ded4e129a1 Switch from deprecated ioutil 2023-04-18 16:17:06 -07:00
Ian Fijolek
95cb24ac04 Upgrade golangci-lint pre-commit hook 2023-04-18 16:16:53 -07:00
Ian Fijolek
e6447b615f Upgrade some dependencies 2023-04-18 16:09:41 -07:00
Ian Fijolek
2680eabd40 Rebuild when mod files change 2023-04-18 16:09:30 -07:00
Ian Fijolek
23340e823f Update go version in go.mod 2023-04-18 16:02:04 -07:00
Ian Fijolek
99b8723abc Add curl script to simplify http healthchecks 2023-04-18 15:56:59 -07:00
Ian Fijolek
7d87c3d036 Add default values for AlertEvery
There is also a test error corrected in TestMonitorFailureAlertEvery
where the same test conditions were repeated twice.
2022-12-19 15:49:32 -08:00
Ian Fijolek
deec04bf0d Allow setting of global defaults for some values
This helps with reducing redundant config.

Note: There is no default for `alert_every` because the zero value has a
meaning and cannot be interpreted as an omission.
2022-12-19 15:49:32 -08:00
Ian Fijolek
958446050f Update linters 2022-12-19 15:34:47 -08:00
Ian Fijolek
88e94642d9 Remove some hooks included in golangci-lint and upgrade existing 2022-06-07 21:39:18 -07:00
Ian Fijolek
bc83a51907 Switch pre-commit url for golang 2022-04-04 20:12:01 -07:00
Ian Fijolek
08b8932331 Update curl version 2022-01-24 16:08:18 -08:00
Ian Fijolek
9072d97bb8 Make linters happy 2022-01-24 10:39:53 -08:00
Ian Fijolek
cdd8a69669 Update go version 2021-12-01 14:47:58 -08:00
Ian Fijolek
3c14a02770 Continue checking all monitors after sending alert
Previously this was mistakenly returning after sending an alert. Now
all alerts will be sent unless there is an exception on one of them.
2021-09-02 10:20:04 -07:00
Ian Fijolek
328ea83c25 Some linting cleanup 2021-09-02 10:19:03 -07:00
Ian Fijolek
ce986e8d1d Roll back to alpine:3.12
Looks like there is a clock issue with raspbian

https://wiki.alpinelinux.org/wiki/Release_Notes_for_Alpine_3.13.0#time64_requirements
2021-05-12 19:06:41 -07:00
Ian Fijolek
31a4b484bf Merge branch 'duration-intervals' 2021-05-12 18:32:12 -07:00
Ian Fijolek
49e3635819 Add backwards compatility explanation in Readme 2021-05-12 16:37:59 -07:00
Ian Fijolek
444d060736 Remove qemu-user-static from Dockerfile and update alpine
My build machine now has proper qemu support added, so this is not needed
2021-05-12 23:22:24 +00:00
Ian Fijolek
860c2cdf43 Add custom type to parse out seconds as int and durations as strings 2021-05-12 10:33:42 -07:00
Ian Fijolek
befea7375f Add check runtime metric 2021-05-11 10:41:39 -07:00
Ian Fijolek
04395fa693 Add duration parsing tests 2021-05-11 10:41:39 -07:00
Ian Fijolek
bdf7355fa7 Add duration parsing for intervals 2021-05-11 10:41:39 -07:00
Ian Fijolek
30c2c7d6b2 Add Dockerfile linting back in 2021-05-10 21:53:26 -07:00
Ian Fijolek
5f250f17a8 Add more liniting and update to pass 2021-05-10 21:53:26 -07:00
Ian Fijolek
fda9e1bfc3 Replace log with slog 2021-05-10 21:53:26 -07:00
Ian Fijolek
f0e179851f Update linting and a test case 2021-01-08 18:31:22 -05:00
Ian Fijolek
9e124803da Add release uploads 2021-01-08 18:13:48 -05:00
Ian Fijolek
2c4543a7bc Update go version to 1.15 2021-01-08 18:13:34 -05:00
Ian Fijolek
a1b906b94a Update for go 1.15 2020-11-16 15:56:31 -08:00
Ian Fijolek
0a5be250b5 Scripts: Add echoing log lines to helper scripts
Rather than only returning the status of whether or not a container is
healhthy, the helper scripts will now optionally echo some of the latest
log lines.
2020-11-16 15:52:21 -08:00
Ian Fijolek
88f77aa27c Fix Makefile comment 2020-11-16 15:51:41 -08:00
Ian Fijolek
67c2375bba Remove docker linting for now
Drone check doesn't pass. Need to install docker there
2020-07-14 17:29:54 -07:00
Ian Fijolek
aad9eaa32f Update exported status metric to properly reflect alerting status of a monitor
It was using the result of the individual check and not the monitor as a whole
2020-07-14 17:09:56 -07:00
Ian Fijolek
5dc5ba5257 Add docker linting 2020-07-14 17:08:48 -07:00
Ian Fijolek
4aff294739 Set overrided version in drone config 2020-07-07 12:15:53 -07:00
Ian Fijolek
0684b15a44 Update logic for setting version
I noticed that versions were not being properly dervied from the git
tags. This fixes that in a simpler way by allowing git to describe the
current commit with tags, commits, shas, and a dirty maker.
2020-07-07 10:51:13 -07:00
Ian Fijolek
d3826dacde Update drone to use new linux only target 2020-07-06 20:33:02 -07:00
Ian Fijolek
f8e40c643c Move static binaries to dist/ for easier publishing
This will make it easier to publish them to Github or Gitea releases later.

To avoid making the Makefile super complex, this patch also makes use of
variables to simplify the Makefile as well.
2020-07-06 20:15:21 -07:00
Ian Fijolek
cffbbd734a Make default log alert conditional
Allow using the default `log` alert for both up and down alerts using
Go's templating conditionals. Following this example can do away with
the need for an up and down version of every alert.
2020-06-19 09:51:42 -07:00
Ian Fijolek
ad6f3be6ec Update README with more detailed running instructions from prior project 2020-02-19 22:13:30 -08:00
Ian Fijolek
ae30f477f7 Add ability to customize metrics port 2020-02-19 22:13:07 -08:00
Ian Fijolek
9dcd8ebf12 Update README to correct differences between py and go versions 2020-02-19 21:56:01 -08:00
Ian Fijolek
11af700618 Merge branch 'minitor-py-compat-rebase' 2020-02-19 21:21:40 -08:00
42 changed files with 1909 additions and 1186 deletions
+40 -101
View File
@@ -1,114 +1,53 @@
---
kind: pipeline kind: pipeline
name: test type: kubernetes
name: default
steps: node_selector:
zone: dev
- name: test
image: golang:1.12
commands:
- make build
- make test
- name: check
image: python:3
commands:
- pip install pre-commit==1.20.0
- make check
- name: notify
image: drillster/drone-email
settings:
host:
from_secret: SMTP_HOST
username:
from_secret: SMTP_USER
password:
from_secret: SMTP_PASS
from: drone@iamthefij.com
when:
status: [changed, failure]
---
kind: pipeline
name: publish
depends_on:
- test
trigger: trigger:
event: event:
- push - push
- tag - tag
refs:
- refs/heads/master workspace:
- refs/tags/v* path: /drone/src
steps: steps:
- name: build all binaries - name: pull image to dockerhub
image: golang:1.12 image: docker.io/owncloudci/drone-docker-buildx:4
commands: privileged: true
- make all
- name: push image - arm
image: plugins/docker
settings: settings:
repo: iamthefij/minitor-go cache-from: [ "safebox/minitor" ]
auto_tag: true repo: safebox/minitor
auto_tag_suffix: linux-arm tags: latest
username: username:
from_secret: docker_username from_secret: dockerhub-username
password: password:
from_secret: docker_password from_secret: dockerhub-password
build_args: platforms:
- ARCH=arm - linux/amd64
- REPO=arm32v7 - linux/arm64
- name: push image - arm64
image: plugins/docker
settings:
repo: iamthefij/minitor-go
auto_tag: true
auto_tag_suffix: linux-arm64
username:
from_secret: docker_username
password:
from_secret: docker_password
build_args:
- ARCH=arm64
- REPO=arm64v8
- name: push image - amd64
image: plugins/docker
settings:
repo: iamthefij/minitor-go
auto_tag: true
auto_tag_suffix: linux-amd64
username:
from_secret: docker_username
password:
from_secret: docker_password
- name: publish manifest
image: plugins/manifest
settings:
spec: manifest.tmpl
auto_tag: true
ignore_missing: true
username:
from_secret: docker_username
password:
from_secret: docker_password
- name: notify
image: drillster/drone-email
settings:
host:
from_secret: SMTP_HOST
username:
from_secret: SMTP_USER
password:
from_secret: SMTP_PASS
from: drone@iamthefij.com
when: when:
status: [changed, failure] event:
- tag
- name: build multiarch from dev
image: docker.io/owncloudci/drone-docker-buildx:4
privileged: true
settings:
cache-from: [ "registry.dev.format.hu/minitor" ]
registry: registry.dev.format.hu
repo: registry.dev.format.hu/minitor
tags: latest
dockerfile: Dockerfile
username:
from_secret: dev-hu-registry-username
password:
from_secret: dev-hu-registry-password
platforms:
- linux/amd64
- linux/arm64
when:
event:
- push
Vendored
+3 -2
View File
@@ -14,8 +14,9 @@
# User configuration # User configuration
config.yml config.yml
config.hcl
# Output binary # Output binary
minitor minitor
minitor-linux-* minitor-go
minitor-darwin-amd64 dist/
+52
View File
@@ -0,0 +1,52 @@
version: "2"
linters:
enable:
- errname
- errorlint
- exhaustive
- goprintffuncname
- misspell
- mnd
- tagliatelle
- testpackage
- thelper
- tparallel
- unconvert
- wrapcheck
- wsl
disable:
- gochecknoglobals
settings:
gosec:
excludes:
- G204
tagliatelle:
case:
rules:
json: snake
yaml: snake
exclusions:
generated: lax
presets:
- comments
- common-false-positives
- legacy
- std-error-handling
rules:
- linters:
- gosec
path: _test\.go
paths:
- third_party$
- builtin$
- examples$
formatters:
enable:
- gofumpt
- goimports
exclusions:
generated: lax
paths:
- third_party$
- builtin$
- examples$
+8 -7
View File
@@ -1,7 +1,7 @@
--- ---
repos: repos:
- repo: https://github.com/pre-commit/pre-commit-hooks - repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0 rev: v6.0.0
hooks: hooks:
- id: check-added-large-files - id: check-added-large-files
- id: check-yaml - id: check-yaml
@@ -10,10 +10,11 @@ repos:
- id: trailing-whitespace - id: trailing-whitespace
- id: end-of-file-fixer - id: end-of-file-fixer
- id: check-merge-conflict - id: check-merge-conflict
- repo: git://github.com/dnephin/pre-commit-golang - repo: https://github.com/golangci/golangci-lint
rev: v0.3.5 rev: v2.7.2
hooks: hooks:
- id: go-fmt - id: golangci-lint
- id: go-imports - repo: https://github.com/hadolint/hadolint
# - id: gometalinter rev: refs/pull/1152/head
# - id: golangci-lint hooks:
- id: hadolint-github
+112 -10
View File
@@ -1,27 +1,129 @@
ARG REPO=library FROM golang:1.25 AS gomail-builder
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static ARG TARGETARCH=amd64
FROM ${REPO}/alpine:3.10 ARG TARGETOS=linux
WORKDIR /gomail
RUN { \
echo 'package main'; \
echo ''; \
echo 'import ('; \
echo ' "fmt"'; \
echo ' "io"'; \
echo ' "net/mail"'; \
echo ' "net/smtp"'; \
echo ' "os"'; \
echo ' "strings"'; \
echo ')'; \
echo ''; \
echo 'func main() {'; \
echo ' readHeaders := strings.Contains(strings.Join(os.Args[1:], " "), "-t")'; \
echo ' recipients := []string{}'; \
echo ' for _, arg := range os.Args[1:] {'; \
echo ' if !strings.HasPrefix(arg, "-") {'; \
echo ' recipients = append(recipients, arg)'; \
echo ' }'; \
echo ' }'; \
echo ''; \
echo ' body, err := io.ReadAll(os.Stdin)'; \
echo ' if err != nil {'; \
echo ' fmt.Fprintln(os.Stderr, err)'; \
echo ' os.Exit(1)'; \
echo ' }'; \
echo ''; \
echo ' if readHeaders {'; \
echo ' msg, parseErr := mail.ReadMessage(strings.NewReader(string(body)))'; \
echo ' if parseErr != nil {'; \
echo ' fmt.Fprintln(os.Stderr, parseErr)'; \
echo ' os.Exit(1)'; \
echo ' }'; \
echo ' for _, hdr := range []string{"To", "Cc", "Bcc"} {'; \
echo ' if val := msg.Header.Get(hdr); val != "" {'; \
echo ' addrs, addrErr := mail.ParseAddressList(val)'; \
echo ' if addrErr != nil {'; \
echo ' fmt.Fprintln(os.Stderr, addrErr)'; \
echo ' os.Exit(1)'; \
echo ' }'; \
echo ' for _, addr := range addrs {'; \
echo ' recipients = append(recipients, addr.Address)'; \
echo ' }'; \
echo ' }'; \
echo ' }'; \
echo ' }'; \
echo ''; \
echo ' if len(recipients) == 0 {'; \
echo ' fmt.Fprintln(os.Stderr, "usage: sendmail [-t] recipient...")'; \
echo ' os.Exit(1)'; \
echo ' }'; \
echo ''; \
echo ' relay := os.Getenv("SMTP_RELAY")'; \
echo ' if relay == "" {'; \
echo ' relay = "172.17.0.2"'; \
echo ' }'; \
echo ''; \
echo ' port := os.Getenv("SMTP_PORT")'; \
echo ' if port == "" {'; \
echo ' port = "25"'; \
echo ' }'; \
echo ''; \
echo ' sender := os.Getenv("EMAIL_FROM")'; \
echo ' if sender == "" {'; \
echo ' fmt.Fprintln(os.Stderr, "[sendmail] EMAIL_FROM is not set, skipping")'; \
echo ' os.Exit(0)'; \
echo ' }'; \
echo ''; \
echo ' debug := os.Getenv("DEBUG") != ""'; \
echo ' if debug {'; \
echo ' fmt.Fprintln(os.Stderr, fmt.Sprintf("[sendmail] relay=%s port=%s sender=%s recipients=%v", relay, port, sender, recipients))'; \
echo ' fmt.Fprintln(os.Stderr, "[sendmail] body:")'; \
echo ' fmt.Fprintln(os.Stderr, string(body))'; \
echo ' }'; \
echo ''; \
echo ' if err = smtp.SendMail(relay+":"+port, nil, sender, recipients, body); err != nil {'; \
echo ' fmt.Fprintln(os.Stderr, err)'; \
echo ' os.Exit(1)'; \
echo ' }'; \
echo ' if debug {'; \
echo ' fmt.Fprintln(os.Stderr, "[sendmail] sent successfully")'; \
echo ' }'; \
echo '}'; \
} > main.go
RUN go mod init gomail && \
CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -o /usr/local/bin/sendmail .
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
FROM golang:1.25 AS builder
WORKDIR /app
COPY ./go.mod ./go.sum /app/
RUN go mod download
COPY ./*.go /app/
RUN rm -f /app/gomail.go
ARG TARGETARCH=amd64
ARG TARGETOS=linux
ARG VERSION=dev
RUN CGO_ENABLED=0 GOOS=${TARGETOS} GOARCH=${TARGETARCH} go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM alpine:3.23
RUN mkdir /app RUN mkdir /app
WORKDIR /app/ WORKDIR /app/
# Copy minitor in
COPY --from=builder /app/minitor .
# Copy sendmail (gomail) in
COPY --from=gomail-builder /usr/local/bin/sendmail /usr/local/bin/sendmail
# Add common checking tools # Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6 # hadolint ignore=DL3018
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata
# Add minitor user for running as non-root # Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor RUN addgroup -S minitor && adduser -S minitor -G minitor
# Copy scripts # Copy scripts
COPY ./scripts /app/scripts COPY ./scripts /app/scripts
COPY default_config.hcl /app/config.hcl
RUN chmod -R 755 /app/scripts RUN chmod -R 755 /app/scripts
# Copy minitor in
ARG ARCH=amd64
COPY ./minitor-linux-${ARCH} ./minitor
# Drop to non-root user # Drop to non-root user
USER minitor USER minitor
-41
View File
@@ -1,41 +0,0 @@
ARG REPO=library
FROM golang:1.12-alpine AS builder
RUN apk add --no-cache git=~2
RUN mkdir /app
WORKDIR /app
COPY ./go.mod ./go.sum /app/
RUN go mod download
COPY ./*.go /app/
ARG ARCH=amd64
ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM ${REPO}/alpine:3.10
RUN mkdir /app
WORKDIR /app/
# Copy minitor in
COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
# Copy scripts
COPY ./scripts /app/scripts
RUN chmod -R 755 /app/scripts
# Drop to non-root user
USER minitor
ENTRYPOINT [ "./minitor" ]
# vim: set filetype=dockerfile:
+37 -43
View File
@@ -1,36 +1,43 @@
DOCKER_TAG ?= minitor-go-${USER} DOCKER_TAG ?= minitor-go-${USER}
GIT_TAG_NAME := $(shell git tag -l --contains HEAD) VERSION ?= $(shell git describe --tags --dirty)
GIT_SHA := $(shell git rev-parse HEAD) GOFILES = *.go go.mod go.sum
VERSION := $(if $(GIT_TAG_NAME),$(GIT_TAG_NAME),$(GIT_SHA)) # Multi-arch targets are generated from this
TARGET_ALIAS = minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 minitor-darwin-amd64
TARGETS = $(addprefix dist/,$(TARGET_ALIAS))
#
# Default make target will run tests
.DEFAULT_GOAL = test
# Build all static Minitor binaries
.PHONY: all .PHONY: all
all: minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 all: $(TARGETS)
.PHONY: default # Build all static Linux Minitor binaries. Used in Docker images
default: test .PHONY: all-linux
all-linux: $(filter dist/minitor-linux-%,$(TARGETS))
# Build minitor for the current machine
minitor: $(GOFILES)
@echo Version: $(VERSION)
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
.PHONY: build .PHONY: build
build: minitor build: minitor
minitor: # Run minitor for the current machine
@echo Version: $(VERSION)
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
.PHONY: run .PHONY: run
run: minitor build run: minitor
./minitor -debug ./minitor -debug
.PHONY: run-metrics .PHONY: run-metrics
run-metrics: minitor build run-metrics: minitor
./minitor -debug -metrics ./minitor -debug -metrics
# Run all tests
.PHONY: test .PHONY: test
test: test:
go test -coverprofile=coverage.out go test -coverprofile=coverage.out
@echo
go tool cover -func=coverage.out go tool cover -func=coverage.out
@echo
@# Check min coverage percentage
@go tool cover -func=coverage.out | awk -v target=80.0% \ @go tool cover -func=coverage.out | awk -v target=80.0% \
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }' '/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
@@ -39,7 +46,7 @@ test:
install-hooks: install-hooks:
pre-commit install --install-hooks pre-commit install --install-hooks
# Checks files for encryption # Runs pre-commit checks on files
.PHONY: check .PHONY: check
check: check:
pre-commit run --all-files pre-commit run --all-files
@@ -47,9 +54,8 @@ check:
.PHONY: clean .PHONY: clean
clean: clean:
rm -f ./minitor rm -f ./minitor
rm -f ./minitor-linux-*
rm -f ./minitor-darwin-amd64
rm -f ./coverage.out rm -f ./coverage.out
rm -fr ./dist
.PHONY: docker-build .PHONY: docker-build
docker-build: docker-build:
@@ -57,39 +63,27 @@ docker-build:
.PHONY: docker-run .PHONY: docker-run
docker-run: docker-build docker-run: docker-build
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG) docker run --rm -v $(shell pwd)/sample-config.hcl:/root/config.hcl $(DOCKER_TAG)
## Multi-arch targets ## Multi-arch targets
$(TARGETS): $(GOFILES)
# Arch specific go build targets mkdir -p ./dist
minitor-darwin-amd64: GOOS=$(word 2, $(subst -, ,$(@))) GOARCH=$(word 3, $(subst -, ,$(@))) CGO_ENABLED=0 \
GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \ go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-darwin-amd64 -o $@
minitor-linux-amd64: .PHONY: $(TARGET_ALIAS)
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \ $(TARGET_ALIAS):
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \ $(MAKE) $(addprefix dist/,$@)
-o minitor-linux-amd64
minitor-linux-arm:
GOOS=linux GOARCH=arm CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-linux-arm
minitor-linux-arm64:
GOOS=linux GOARCH=arm64 CGO_ENABLED=0 \
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
-o minitor-linux-arm64
# Arch specific docker build targets # Arch specific docker build targets
.PHONY: docker-build-arm .PHONY: docker-build-arm
docker-build-arm: minitor-linux-arm docker-build-arm: dist/minitor-linux-arm
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm docker build --platform linux/arm . -t ${DOCKER_TAG}-linux-arm
.PHONY: docker-build-arm .PHONY: docker-build-arm64
docker-build-arm64: minitor-linux-arm64 docker-build-arm64: dist/minitor-linux-arm64
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64 docker build --platform linux/arm64 . -t ${DOCKER_TAG}-linux-arm64
# Cross run on host architechture # Cross run on host architechture
.PHONY: docker-run-arm .PHONY: docker-run-arm
+200 -59
View File
@@ -1,79 +1,220 @@
# minitor-go # [minitor-go](https://git.iamthefij.com/iamthefij/minitor-go)
A reimplementation of [Minitor](https://git.iamthefij.com/iamthefij/minitor) in Go A minimal monitoring system
Minitor is already a minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a large footprint. Thus Go feels like a better fit for the project, longer term. ## What does it do?
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features. Minitor accepts an HCL configuration file with a set of commands to run and a set of alerts to execute when those commands fail. Minitor has a narow feature set and instead follows a principle to outsource to other command line tools when possible. Thus, it relies on other command line tools to do checks and issue alerts. To make getting started a bit easier, Minitor includes a few scripts to help with common tasks.
## Differences from Python version ## But why?
I'm running a few small services and found Sensu, Consul, Nagios, etc. to all be far too complicated for my usecase.
Templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct, `AlertNotice` defined in `alert.go` and the built in Go templating format. Eg. ## So how do I use it?
minitor-py: ### Running
```yaml
alerts: Install and execute with:
log_command:
command: ['echo', '{monitor_name}'] ```bash
log_shell: go install github.com/iamthefij/minitor-go/v2@latest
command: 'echo {monitor_name}' minitor
``` ```
minitor-go: If locally developing you can use:
```yaml
alerts: ```bash
log_command: make run
command: ['echo', '{{.MonitorName}}']
log_shell:
command: 'echo {{.MonitorName}}'
``` ```
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not. It will read the contents of `sample-config.hcl` and begin its loop. You could also run it directly and provide a new config file via the `-config` argument.
minitor-py:
```yaml #### Docker
alerts:
log_shell: You can pull this repository directly from Docker:
command: >
echo "line 1" ```bash
echo "line 2" docker pull iamthefij/minitor-go:latest
echo "continued" \
"line"
``` ```
minitor-go: The Docker image uses a default `config.hcl` copied from `sample-config.hcl`. This won't really do anything for you, so when you run the Docker image, you should supply your own `config.hcl` file:
```yaml
alerts: ```bash
log_shell: docker run -v $PWD/sample-config.hcl:/app/config.hcl iamthefij/minitor-go:latest
command: >
echo "line 1";
echo "line 2";
echo "continued"
"line"
``` ```
## To do Images are provided for `amd64`, `arm`, and `arm64` architechtures.
There are two sets of task lists. The first is to get rough parity on key features with the Python version. The second is to make some improvements to the framework.
Pairity: You can configure the timezone for the container by passing a `TZ` env variable. Eg. `TZ=America/Los_Angeles`.
- [x] Run monitor commands ## Configuring
- [x] Run monitor commands in a shell
- [x] Run alert commands
- [x] Run alert commands in a shell
- [x] Allow templating of alert commands
- [x] Implement Prometheus client to export metrics
- [x] Test coverage
- [x] Integration testing (manual or otherwise)
- [x] Allow commands and shell commands in the same config key
Improvement (potentially breaking): In this repo, you can explore the `sample-config.hcl` file for an example, but the general structure is as follows. If you are passing environment variables to your commands or alerts, you should be aware that `${VAR}` syntax is reserved for HCL variable interpolation. To avoid issues, you can use `$${VAR}` syntax to escape the `$` character, simply use `$VAR`.
- [ ] Implement leveled logging (maybe glog or logrus) ```hcl
- [ ] Consider switching from YAML to TOML
- [ ] Consider value of templating vs injecting values into Env variables The global configurations are:
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging
- [ ] Async checking |key|value|
- [ ] Use durations rather than seconds checked in event loop |---|---|
- [ ] Revisit metrics and see if they all make sense |`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|`default_alert_after`|A default value used as an `alert_after` value for a monitor if not specified. Defaults 1, which will alert immediately.|
|`default_alert_every`|A default value used as an `alert_every` value for a monitor if not specified. Defaults to -1, which will re-alert exponentially.|
|`default_alert_down`|Default down alerts to used by a monitor in case none are provided.|
|`default_alert_up`|Default up alerts to used by a monitor in case none are provided.|
|`monitor`|block listing monitors. Detailed description below|
|`alert`|List of all alerts. Detailed description below|
### Monitors
Represent your monitors as blocks with a label indicating the name of the monitor.
```hcl
monitor "example" {
command = ["echo", "Hello, World!"]
alert_down = ["log"]
alert_up = ["log"]
check_interval = "1m"
alert_after = 1
alert_every = -1
}
```
Each monitor allows the following configuration:
|key|value|
|---|---|
|`name`|Name of the monitor running. This will show up in messages and logs.|
|`command`|A list of strings representing a command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `shell_command`|
|`shell_command`|A single string that represents a shell command to be executed. This command's exit value will determine whether the check is successful. This value is mutually exclusive to `command`|
|`alert_down`|A list of Alerts to be triggered when the monitor is in a "down" state|
|`alert_up`|A list of Alerts to be triggered when the monitor moves to an "up" state|
|`check_interval`|The interval at which this monitor should be checked. This must be greater than the global `check_interval` value|
|`alert_after`|Allows specifying the number of failed checks before an alert should be triggered. A value of 1 will start sending alerts after the first failure.|
|`alert_every`|Allows specifying how often an alert should be retriggered. There are a few magic numbers here. Defaults to `-1` for an exponential backoff. Setting to `0` disables re-alerting. Positive values will allow retriggering after the specified number of checks|
### Alerts
Represent your alerts as blocks with a lable indicating the name of the alert. The name will be used in your monitor setup in `alert_down` and `alert_up`.
```hcl
monitor "example" {
command = ["false"]
alert_down = ["log"]
}
alert "log" {
shell_command = "echo '{{.MonitorName}} is down!'"
}
```
Each alert allows the following configuration:
|key|value|
|---|---|
|`command`|Specifies the command that should be executed in exec form. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `shell_command`|
|`shell_command`|Specifies a shell command as a single string. This is the command that will be run when the alert is executed. This can be templated with environment variables or the variables shown in the table below. This value is mutually exclusive to `command`|
Also, when alerts are executed, they will be passed through Go's format function with arguments for some attributes of the Monitor. The following monitor specific variables can be referenced using Go formatting syntax:
|token|value|
|---|---|
|`{{.AlertCount}}`|Number of times this monitor has alerted|
|`{{.FailureCount}}`|The total number of sequential failed checks for this monitor|
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|`{{.LastSuccess}}`|The datetime of the last successful check as a go Time struct|
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
To provide flexible formatting, the following non-standard functions are available in templates:
|func|description|
|---|---|
|`ANSIC <Time>`|Formats provided time in ANSIC format|
|`UnixDate <Time>`|Formats provided time in UnixDate format|
|`RubyDate <Time>`|Formats provided time in RubyDate format|
|`RFC822Z <Time>`|Formats provided time in RFC822Z format|
|`RFC850 <Time>`|Formats provided time in RFC850 format|
|`RFC1123 <Time>`|Formats provided time in RFC1123 format|
|`RFC1123Z <Time>`|Formats provided time in RFC1123Z format|
|`RFC3339 <Time>`|Formats provided time in RFC3339 format|
|`RFC3339Nano <Time>`|Formats provided time in RFC3339Nano format|
|`FormatTime <Time> <string template>`|Formats provided time according to provided template|
|`InTZ <Time> <string timezone name>`|Converts provided time to parsed timezone from the provided name|
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
#### Running alerts on startup
It's not the best feeling to find out your alerts are broken when you're expecting to be alerted about another failure. To avoid this and provide early insight into broken alerts, it is possible to specify a list of alerts to run when Minitor starts up. This can be done using the command line flag `-startup-alerts`. This flag accepts a comma separated list of strings and will run a test of each of those alerts. Minitor will then respond as it typically does for any failed alert. This can be used to allow you time to correct when initially launching, and to allow schedulers to more easily detect a failed deployment of Minitor.
Eg.
```bash
minitor -startup-alerts=log_down,log_up -config ./config.hcl
```
### Metrics
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
It is also possible to use the metrics endpoint for monitoring Minitor itself! This allows setting up multiple instances of Minitor on different servers and have them monitor each-other so that you can detect a minitor outage.
To run minitor with metrics, use the `-metrics` flag. The metrics will be served on port `8080` by default, though it can be overriden using `-metrics-port`. They will be accessible on the path `/metrics`. Eg. `localhost:8080/metrics`.
```bash
minitor -metrics
# or
minitor -metrics -metrics-port 3000
```
## Migrating from v1 to v2
Minitor v2 introduces some breaking changes from v1. The most notable changes are:
- The configuration file is now in HCL format instead of YAML.
- The the Python formatting backwards compatability is removed.
- The Command and ShellCommand fields are now mutually exclusive.
- The check_interval is now strictly a duration string value. Eg. "30s" rather than `30`.
- Default alert_every is now -1 (exponential backoff) rather than 0 (no re-alerting).
For the configuration, a confic that looked like this in v1:
```yaml
check_interval: 60
monitors:
- name: example
command: "false"
alert_down: ["log"]
alerts:
log:
command: ["echo", "Minitor up={{.IsUp}} for {{.MonitorName}}"]
```
Would now look like this in v2:
```hcl
check_interval = "1m"
monitor "example" {
# example showing string to shell command migration
shell_command = "false"
alert_down = ["log"]
check_interval = "1m"
}
alert "log" {
# example showing list to exec command migration
command = ["echo", "Minitor up={{.IsUp}} for {{.MonitorName}}"]
}
```
## Contributing
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
Github Mirror: https://github.com/IamTheFij/minitor.git
+110 -58
View File
@@ -2,72 +2,120 @@ package main
import ( import (
"bytes" "bytes"
"errors"
"fmt" "fmt"
"log"
"os/exec" "os/exec"
"strings"
"text/template" "text/template"
"time" "time"
"git.iamthefij.com/iamthefij/slog"
)
var (
errNoTemplate = errors.New("no template")
// ErrAlertFailed indicates that an alert failed to send
ErrAlertFailed = errors.New("alert failed")
) )
// Alert is a config driven mechanism for sending a notice // Alert is a config driven mechanism for sending a notice
type Alert struct { type Alert struct {
Name string Name string `hcl:"name,label"`
Command CommandOrShell Command []string `hcl:"command,optional"`
ShellCommand string `hcl:"shell_command,optional"`
commandTemplate []*template.Template commandTemplate []*template.Template
commandShellTemplate *template.Template commandShellTemplate *template.Template
} }
// AlertNotice captures the context for an alert to be sent // AlertNotice captures the context for an alert to be sent
type AlertNotice struct { type AlertNotice struct {
MonitorName string AlertCount int
AlertCount int16 FailureCount int
FailureCount int16
LastCheckOutput string
LastSuccess time.Time
IsUp bool IsUp bool
LastSuccess time.Time
MonitorName string
LastCheckOutput string
} }
// IsValid returns a boolean indicating if the Alert has been correctly // Validate checks that the Alert is properly configured and returns errors if not
// configured func (alert Alert) Validate() error {
func (alert Alert) IsValid() bool { hasCommand := len(alert.Command) > 0
return !alert.Command.Empty() hasShellCommand := alert.ShellCommand != ""
var err error
hasAtLeastOneCommand := hasCommand || hasShellCommand
if !hasAtLeastOneCommand {
err = errors.Join(err, fmt.Errorf(
"%w: alert %s has no command or shell_command configured",
ErrInvalidAlert,
alert.Name,
))
}
hasAtMostOneCommand := !(hasCommand && hasShellCommand)
if !hasAtMostOneCommand {
err = errors.Join(err, fmt.Errorf(
"%w: alert %s has both command and shell_command configured",
ErrInvalidAlert,
alert.Name,
))
}
return err
} }
// BuildTemplates compiles command templates for the Alert // BuildTemplates compiles command templates for the Alert
func (alert *Alert) BuildTemplates() error { func (alert *Alert) BuildTemplates() error {
// TODO: Remove legacy template support later after 1.0 slog.Debugf("Building template for alert %s", alert.Name)
legacy := strings.NewReplacer(
"{alert_count}", "{{.AlertCount}}", // Time format func factory
"{alert_message}", "{{.MonitorName}} check has failed {{.FailureCount}} times", tff := func(formatString string) func(time.Time) string {
"{failure_count}", "{{.FailureCount}}", return func(t time.Time) string {
"{last_output}", "{{.LastCheckOutput}}", return t.Format(formatString)
"{last_success}", "{{.LastSuccess}}",
"{monitor_name}", "{{.MonitorName}}",
)
if LogDebug {
log.Printf("DEBUG: Building template for alert %s", alert.Name)
} }
if alert.commandTemplate == nil && alert.Command.Command != nil { }
// Create some functions for formatting datetimes in popular formats
timeFormatFuncs := template.FuncMap{
"ANSIC": tff(time.ANSIC),
"UnixDate": tff(time.UnixDate),
"RubyDate": tff(time.RubyDate),
"RFC822Z": tff(time.RFC822Z),
"RFC850": tff(time.RFC850),
"RFC1123": tff(time.RFC1123),
"RFC1123Z": tff(time.RFC1123Z),
"RFC3339": tff(time.RFC3339),
"RFC3339Nano": tff(time.RFC3339Nano),
"FormatTime": func(t time.Time, timeFormat string) string {
return t.Format(timeFormat)
},
"InTZ": func(t time.Time, tzName string) (time.Time, error) {
tz, err := time.LoadLocation(tzName)
if err != nil {
return t, fmt.Errorf("failed to convert time to specified tz: %w", err)
}
return t.In(tz), nil
},
}
switch {
case alert.Command != nil:
alert.commandTemplate = []*template.Template{} alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command.Command { for i, cmdPart := range alert.Command {
if PyCompat {
cmdPart = legacy.Replace(cmdPart)
}
alert.commandTemplate = append(alert.commandTemplate, template.Must( alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+string(i)).Parse(cmdPart), template.New(alert.Name+fmt.Sprint(i)).Funcs(timeFormatFuncs).Parse(cmdPart),
)) ))
} }
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" { case alert.ShellCommand != "":
shellCmd := alert.Command.ShellCommand shellCmd := alert.ShellCommand
if PyCompat {
shellCmd = legacy.Replace(shellCmd)
}
alert.commandShellTemplate = template.Must( alert.commandShellTemplate = template.Must(
template.New(alert.Name).Parse(shellCmd), template.New(alert.Name).Funcs(timeFormatFuncs).Parse(shellCmd),
) )
} else { default:
return fmt.Errorf("No template provided for alert %s", alert.Name) return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
} }
return nil return nil
@@ -75,30 +123,40 @@ func (alert *Alert) BuildTemplates() error {
// Send will send an alert notice by executing the command template // Send will send an alert notice by executing the command template
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) { func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName) slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
var cmd *exec.Cmd var cmd *exec.Cmd
if alert.commandTemplate != nil {
switch {
case alert.commandTemplate != nil:
command := []string{} command := []string{}
for _, cmdTmp := range alert.commandTemplate { for _, cmdTmp := range alert.commandTemplate {
var commandBuffer bytes.Buffer var commandBuffer bytes.Buffer
err = cmdTmp.Execute(&commandBuffer, notice) err = cmdTmp.Execute(&commandBuffer, notice)
if err != nil { if err != nil {
return return
} }
command = append(command, commandBuffer.String()) command = append(command, commandBuffer.String())
} }
cmd = exec.Command(command[0], command[1:]...) cmd = exec.Command(command[0], command[1:]...)
} else if alert.commandShellTemplate != nil { case alert.commandShellTemplate != nil:
var commandBuffer bytes.Buffer var commandBuffer bytes.Buffer
err = alert.commandShellTemplate.Execute(&commandBuffer, notice) err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
if err != nil { if err != nil {
return return
} }
shellCommand := commandBuffer.String() shellCommand := commandBuffer.String()
cmd = ShellCommand(shellCommand) cmd = ShellCommand(shellCommand)
} else { default:
err = fmt.Errorf("No templates compiled for alert %v", alert.Name) err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
return return
} }
@@ -110,22 +168,16 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
var output []byte var output []byte
output, err = cmd.CombinedOutput() output, err = cmd.CombinedOutput()
outputStr = string(output) outputStr = string(output)
if LogDebug { slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
if err != nil {
err = fmt.Errorf(
"Alert %s failed to send. Returned %w: %w",
alert.Name,
err,
ErrAlertFailed,
)
} }
return outputStr, err return outputStr, err
} }
// NewLogAlert creates an alert that does basic logging using echo
func NewLogAlert() *Alert {
return &Alert{
Name: "log",
Command: CommandOrShell{
Command: []string{
"echo",
"{{.MonitorName}} check has failed {{.FailureCount}} times",
},
},
}
}
+63 -56
View File
@@ -1,133 +1,140 @@
package main package main_test
import ( import (
"log" "errors"
"testing" "testing"
m "git.iamthefij.com/iamthefij/minitor-go/v2"
) )
func TestAlertIsValid(t *testing.T) { func TestAlertValidate(t *testing.T) {
t.Parallel()
cases := []struct { cases := []struct {
alert Alert alert m.Alert
expected bool expected error
name string name string
}{ }{
{Alert{Command: CommandOrShell{Command: []string{"echo", "test"}}}, true, "Command only"}, {m.Alert{Command: []string{"echo", "test"}}, nil, "Command only"},
{Alert{Command: CommandOrShell{ShellCommand: "echo test"}}, true, "CommandShell only"}, {m.Alert{ShellCommand: "echo test"}, nil, "CommandShell only"},
{Alert{}, false, "No commands"}, {m.Alert{Command: []string{"echo", "test"}, ShellCommand: "echo test"}, m.ErrInvalidAlert, "Both commands"},
{m.Alert{}, m.ErrInvalidAlert, "No commands"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
actual := c.alert.IsValid()
if actual != c.expected { t.Run(c.name, func(t *testing.T) {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual) t.Parallel()
log.Printf("Case failed: %s", c.name)
actual := c.alert.Validate()
hasErr := (actual != nil)
expectErr := (c.expected != nil)
if hasErr != expectErr || !errors.Is(actual, c.expected) {
t.Errorf("expected=%t actual=%t", c.expected, actual)
} }
log.Println("-----") })
} }
} }
func TestAlertSend(t *testing.T) { func TestAlertSend(t *testing.T) {
cases := []struct { cases := []struct {
alert Alert alert m.Alert
notice AlertNotice notice m.AlertNotice
expectedOutput string expectedOutput string
expectErr bool expectErr bool
name string name string
pyCompat bool
}{ }{
{ {
Alert{Command: CommandOrShell{Command: []string{"echo", "{{.MonitorName}}"}}}, m.Alert{Command: []string{"echo", "{{.MonitorName}}"}},
AlertNotice{MonitorName: "test"}, m.AlertNotice{MonitorName: "test"},
"test\n", "test\n",
false, false,
"Command with template", "Command with template",
false,
}, },
{ {
Alert{Command: CommandOrShell{ShellCommand: "echo {{.MonitorName}}"}}, m.Alert{ShellCommand: "echo {{.MonitorName}}"},
AlertNotice{MonitorName: "test"}, m.AlertNotice{MonitorName: "test"},
"test\n", "test\n",
false, false,
"Command shell with template", "Command shell with template",
false,
}, },
{ {
Alert{Command: CommandOrShell{Command: []string{"echo", "{{.Bad}}"}}}, m.Alert{Command: []string{"echo", "{{.Bad}}"}},
AlertNotice{MonitorName: "test"}, m.AlertNotice{MonitorName: "test"},
"", "",
true, true,
"Command with bad template", "Command with bad template",
false,
}, },
{ {
Alert{Command: CommandOrShell{ShellCommand: "echo {{.Bad}}"}}, m.Alert{ShellCommand: "echo {{.Bad}}"},
AlertNotice{MonitorName: "test"}, m.AlertNotice{MonitorName: "test"},
"", "",
true, true,
"Command shell with bad template", "Command shell with bad template",
false,
},
{
Alert{Command: CommandOrShell{ShellCommand: "echo {alert_message}"}},
AlertNotice{MonitorName: "test", FailureCount: 1},
"test check has failed 1 times\n",
false,
"Command shell with legacy template",
true,
}, },
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
// Set PyCompat to value of compat flag
PyCompat = c.pyCompat t.Run(c.name, func(t *testing.T) {
c.alert.BuildTemplates() t.Parallel()
err := c.alert.BuildTemplates()
if err != nil {
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
}
output, err := c.alert.Send(c.notice) output, err := c.alert.Send(c.notice)
hasErr := (err != nil) hasErr := (err != nil)
if output != c.expectedOutput { if output != c.expectedOutput {
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output) t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
log.Printf("Case failed: %s", c.name)
} }
if hasErr != c.expectErr { if hasErr != c.expectErr {
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err) t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
log.Printf("Case failed: %s", c.name)
} }
// Set PyCompat back to default value })
PyCompat = false
log.Println("-----")
} }
} }
func TestAlertSendNoTemplates(t *testing.T) { func TestAlertSendNoTemplates(t *testing.T) {
alert := Alert{} alert := m.Alert{}
notice := AlertNotice{} notice := m.AlertNotice{}
output, err := alert.Send(notice) output, err := alert.Send(notice)
if err == nil { if err == nil {
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output) t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
} }
log.Println("-----")
} }
func TestAlertBuildTemplate(t *testing.T) { func TestAlertBuildTemplate(t *testing.T) {
cases := []struct { cases := []struct {
alert Alert alert m.Alert
expectErr bool expectErr bool
name string name string
}{ }{
{Alert{Command: CommandOrShell{Command: []string{"echo", "test"}}}, false, "Command only"}, {m.Alert{Command: []string{"echo", "test"}}, false, "Command only"},
{Alert{Command: CommandOrShell{ShellCommand: "echo test"}}, false, "CommandShell only"}, {m.Alert{ShellCommand: "echo test"}, false, "CommandShell only"},
{Alert{}, true, "No commands"}, {m.Alert{}, true, "No commands"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.alert.BuildTemplates() err := c.alert.BuildTemplates()
hasErr := (err != nil) hasErr := (err != nil)
if hasErr != c.expectErr { if hasErr != c.expectErr {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err) t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----")
})
} }
} }
+124 -112
View File
@@ -2,98 +2,126 @@ package main
import ( import (
"errors" "errors"
"io/ioutil" "fmt"
"log" "time"
"gopkg.in/yaml.v2" "git.iamthefij.com/iamthefij/slog"
"github.com/hashicorp/hcl/v2/hclsimple"
)
var (
ErrLoadingConfig = errors.New("Failed to load or parse configuration")
ErrConfigInit = errors.New("Failed to initialize configuration")
ErrInvalidConfig = errors.New("Invalid configuration")
ErrNoAlerts = errors.New("No alerts provided")
ErrInvalidAlert = errors.New("Invalid alert configuration")
ErrNoMonitors = errors.New("No monitors provided")
ErrInvalidMonitor = errors.New("Invalid monitor configuration")
ErrUnknownAlert = errors.New("Unknown alert")
) )
// Config type is contains all provided user configuration // Config type is contains all provided user configuration
type Config struct { type Config struct {
CheckInterval int64 `yaml:"check_interval"` CheckIntervalStr string `hcl:"check_interval"`
Monitors []*Monitor CheckInterval time.Duration
Alerts map[string]*Alert
}
// CommandOrShell type wraps a string or list of strings DefaultAlertAfter int `hcl:"default_alert_after,optional"`
// for executing a command directly or in a shell DefaultAlertEvery *int `hcl:"default_alert_every,optional"`
type CommandOrShell struct { DefaultAlertDown []string `hcl:"default_alert_down,optional"`
ShellCommand string DefaultAlertUp []string `hcl:"default_alert_up,optional"`
Command []string Monitors []*Monitor `hcl:"monitor,block"`
} Alerts []*Alert `hcl:"alert,block"`
// Empty checks if the Command has a value alertLookup map[string]*Alert
func (cos CommandOrShell) Empty() bool {
return (cos.ShellCommand == "" && cos.Command == nil)
}
// UnmarshalYAML allows unmarshalling either a string or slice of strings
// and parsing them as either a command or a shell command.
func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) error {
var cmd []string
err := unmarshal(&cmd)
// Error indicates this is shell command
if err != nil {
var shellCmd string
err := unmarshal(&shellCmd)
if err != nil {
return err
}
cos.ShellCommand = shellCmd
} else {
cos.Command = cmd
}
return nil
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() (isValid bool) {
isValid = true
// Validate alerts
if config.Alerts == nil || len(config.Alerts) == 0 {
// This should never happen because there is a default alert named 'log' for now
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
isValid = false
}
for _, alert := range config.Alerts {
if !alert.IsValid() {
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
isValid = false
}
}
// Validate monitors
if config.Monitors == nil || len(config.Monitors) == 0 {
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
isValid = false
}
for _, monitor := range config.Monitors {
if !monitor.IsValid() {
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
isValid = false
}
// Check that all Monitor alerts actually exist
for _, isUp := range []bool{true, false} {
for _, alertName := range monitor.GetAlertNames(isUp) {
if _, ok := config.Alerts[alertName]; !ok {
log.Printf(
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
monitor.Name, alertName,
)
isValid = false
}
}
}
}
return
} }
// Init performs extra initialization on top of loading the config from file // Init performs extra initialization on top of loading the config from file
func (config *Config) Init() (err error) { func (config *Config) Init() (err error) {
for name, alert := range config.Alerts { config.CheckInterval, err = time.ParseDuration(config.CheckIntervalStr)
alert.Name = name if err != nil {
return fmt.Errorf("failed to parse top level check_interval duration: %w", err)
}
if config.DefaultAlertAfter == 0 {
minAlertAfter := 1
config.DefaultAlertAfter = minAlertAfter
}
if config.DefaultAlertEvery == nil {
defaultDefaultAlertEvery := -1
config.DefaultAlertEvery = &defaultDefaultAlertEvery
}
for _, monitor := range config.Monitors {
if err = monitor.Init(
config.DefaultAlertAfter,
config.DefaultAlertEvery,
config.DefaultAlertDown,
config.DefaultAlertUp,
); err != nil {
return
}
}
err = config.BuildAllTemplates()
return
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() error {
var err error
// Validate alerts
if len(config.Alerts) == 0 {
err = errors.Join(err, ErrNoAlerts)
}
for _, alert := range config.Alerts {
err = errors.Join(err, alert.Validate())
}
// Validate monitors
if len(config.Monitors) == 0 {
err = errors.Join(err, ErrNoMonitors)
}
for _, monitor := range config.Monitors {
err = errors.Join(err, monitor.Validate())
// Check that all Monitor alerts actually exist
for _, isUp := range []bool{true, false} {
for _, alertName := range monitor.GetAlertNames(isUp) {
if _, ok := config.GetAlert(alertName); !ok {
err = errors.Join(
err,
fmt.Errorf("%w: %s. %w: %s", ErrInvalidMonitor, monitor.Name, ErrUnknownAlert, alertName),
)
}
}
}
}
return err
}
// GetAlert returns an alert by name
func (c Config) GetAlert(name string) (*Alert, bool) {
if c.alertLookup == nil {
c.alertLookup = map[string]*Alert{}
for _, alert := range c.Alerts {
c.alertLookup[alert.Name] = alert
}
}
v, ok := c.alertLookup[name]
return v, ok
}
// BuildAllTemplates builds all alert templates
func (c *Config) BuildAllTemplates() (err error) {
for _, alert := range c.Alerts {
if err = alert.BuildTemplates(); err != nil { if err = alert.BuildTemplates(); err != nil {
return return
} }
@@ -103,39 +131,23 @@ func (config *Config) Init() (err error) {
} }
// LoadConfig will read config from the given path and parse it // LoadConfig will read config from the given path and parse it
func LoadConfig(filePath string) (config Config, err error) { func LoadConfig(filePath string) (Config, error) {
data, err := ioutil.ReadFile(filePath) var config Config
if err != nil {
return if err := hclsimple.DecodeFile(filePath, nil, &config); err != nil {
return config, errors.Join(ErrLoadingConfig, err)
} }
err = yaml.Unmarshal(data, &config) slog.Debugf("Config values:\n%v\n", config)
if err != nil {
return
}
if LogDebug {
log.Printf("DEBUG: Config values:\n%v\n", config)
}
// Add log alert if not present
if PyCompat {
// Intialize alerts list if not present
if config.Alerts == nil {
config.Alerts = map[string]*Alert{}
}
if _, ok := config.Alerts["log"]; !ok {
config.Alerts["log"] = NewLogAlert()
}
}
if !config.IsValid() {
err = errors.New("Invalid configuration")
return
}
// Finish initializing configuration // Finish initializing configuration
err = config.Init() if err := config.Init(); err != nil {
return config, errors.Join(ErrConfigInit, err)
return }
if err := config.IsValid(); err != nil {
return config, errors.Join(ErrInvalidConfig, err)
}
return config, nil
} }
+149 -59
View File
@@ -1,103 +1,193 @@
package main package main_test
import ( import (
"log" "errors"
"testing" "testing"
"time"
m "git.iamthefij.com/iamthefij/minitor-go/v2"
) )
func TestLoadConfig(t *testing.T) { func TestLoadConfig(t *testing.T) {
cases := []struct { cases := []struct {
configPath string configPath string
expectErr bool expectedErr error
name string name string
pyCompat bool
}{ }{
{"./test/valid-config.yml", false, "Valid config file", false}, {"./test/does-not-exist", m.ErrLoadingConfig, "Invalid config path"},
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true}, {"./test/invalid-config-wrong-hcl-type.hcl", m.ErrLoadingConfig, "Incorrect HCL type"},
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false}, {"./test/invalid-config-missing-alerts.hcl", m.ErrNoAlerts, "Invalid config missing alerts"},
{"./test/does-not-exist", true, "Invalid config path", false}, {"./test/invalid-config-missing-alerts.hcl", m.ErrInvalidConfig, "Invalid config general"},
{"./test/invalid-config-type.yml", true, "Invalid config type for key", false}, {"./test/invalid-config-invalid-duration.hcl", m.ErrConfigInit, "Invalid config type for key"},
{"./test/invalid-config-missing-alerts.yml", true, "Invalid config missing alerts", false}, {"./test/invalid-config-unknown-alert.hcl", m.ErrUnknownAlert, "Invalid config unknown alert"},
{"./test/invalid-config-unknown-alert.yml", true, "Invalid config unknown alert", false}, {"./test/valid-config-default-values.hcl", nil, "Valid config file with default values"},
{"./test/valid-config.hcl", nil, "Valid config file"},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
_, err := m.LoadConfig(c.configPath)
hasErr := (err != nil)
expectErr := (c.expectedErr != nil)
if hasErr != expectErr || !errors.Is(err, c.expectedErr) {
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectedErr, err)
}
})
}
}
func TestDefaultConfig(t *testing.T) {
cases := []struct {
configPath string
expectedResult m.Config
name string
}{
{
"./test/valid-config-default-values.hcl",
m.Config{
CheckInterval: 1 * time.Second,
DefaultAlertAfter: 2,
DefaultAlertEvery: Ptr(0),
DefaultAlertDown: []string{"log_command"},
},
"override defaults",
},
{
"./test/valid-config.hcl",
m.Config{
CheckInterval: 30 * time.Second,
DefaultAlertAfter: 1,
DefaultAlertEvery: Ptr(-1),
DefaultAlertDown: []string{},
},
"default defaults",
},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
// Set PyCompat based on compatibility mode
PyCompat = c.pyCompat t.Run(c.name, func(t *testing.T) {
_, err := LoadConfig(c.configPath) t.Parallel()
hasErr := (err != nil)
if hasErr != c.expectErr { config, err := m.LoadConfig(c.configPath)
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err) if err != nil {
log.Printf("Case failed: %s", c.name) t.Errorf("Got error when loading config file %q: %s", c.configPath, err)
} }
// Set PyCompat to default value
PyCompat = false // Test Config has default values
log.Println("-----") if config.DefaultAlertAfter != c.expectedResult.DefaultAlertAfter {
t.Errorf("Got unexpected DefaultAlertAfter from file %q: expected=%v actual=%v", c.configPath, c.expectedResult.DefaultAlertAfter, config.DefaultAlertAfter)
}
if *config.DefaultAlertEvery != *c.expectedResult.DefaultAlertEvery {
t.Errorf("Got unexpected DefaultAlertEvery from file %q: expected=%v actual=%v", c.configPath, *c.expectedResult.DefaultAlertEvery, *config.DefaultAlertEvery)
}
if !m.EqualSliceString(config.DefaultAlertUp, c.expectedResult.DefaultAlertUp) {
t.Errorf("Got unexpected DefaultAlertUp from file %q: expected=%v actual=%v", c.configPath, c.expectedResult.DefaultAlertUp, config.DefaultAlertUp)
}
if !m.EqualSliceString(config.DefaultAlertDown, c.expectedResult.DefaultAlertDown) {
t.Errorf("Got unexpected DefaultAlertDown from file %q: expected=%v actual=%v", c.configPath, c.expectedResult.DefaultAlertDown, config.DefaultAlertDown)
}
// Check that monitor defaults propagate
var defaultMonitor *m.Monitor
for _, monitor := range config.Monitors {
if monitor.Name == "Default" {
defaultMonitor = monitor
}
}
if defaultMonitor == nil {
t.Errorf("failed to find default monitor in %q", c.configPath)
}
if defaultMonitor.AlertAfter != c.expectedResult.DefaultAlertAfter {
t.Errorf("Got unexpected AlertAfter from file %q: expected=%v actual=%v", c.configPath, c.expectedResult.DefaultAlertAfter, defaultMonitor.AlertAfter)
}
if *defaultMonitor.AlertEvery != *c.expectedResult.DefaultAlertEvery {
t.Errorf("Got unexpected AlertEvery from file %q: expected=%v actual=%v", c.configPath, *c.expectedResult.DefaultAlertEvery, *defaultMonitor.AlertEvery)
}
if !m.EqualSliceString(defaultMonitor.AlertUp, c.expectedResult.DefaultAlertUp) {
t.Errorf("Got unexpected AlertUp from file %q: expected=%v actual=%v", c.configPath, c.expectedResult.DefaultAlertUp, defaultMonitor.AlertUp)
}
// NOTE: Can't compare AlertDown because default is empty and that is invalid
})
} }
} }
// TestMultiLineConfig is a more complicated test stepping through the parsing // TestMultiLineConfig is a more complicated test stepping through the parsing
// and execution of mutli-line strings presented in YAML // and execution of mutli-line strings presented in YAML
func TestMultiLineConfig(t *testing.T) { func TestMultiLineConfig(t *testing.T) {
log.Println("Testing multi-line string config") t.Parallel()
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
config, err := m.LoadConfig("./test/valid-verify-multi-line.hcl")
if err != nil { if err != nil {
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err) t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
} }
log.Println("-----") t.Run("Test Monitor with Indented Multi-Line String", func(t *testing.T) {
log.Println("TestMultiLineConfig(parse > string)") // Verify indented heredoc is as expected
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n" expected := "echo 'Some string with stuff'\necho \"<angle brackets>\"\nexit 1\n"
actual := config.Monitors[0].Command.ShellCommand actual := config.Monitors[0].ShellCommand
if expected != actual { if expected != actual {
t.Errorf("TestMultiLineConfig(>) failed") t.Error("Heredoc mismatch")
t.Logf("string expected=`%v`", expected) t.Errorf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual) t.Errorf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
} }
log.Println("-----") // Run the monitor and verify the output
log.Println("TestMultiLineConfig(execute > string)")
_, notice := config.Monitors[0].Check() _, notice := config.Monitors[0].Check()
if notice == nil { if notice == nil {
t.Fatalf("Did not receive an alert notice") t.Fatal("Did not receive an alert notice and should have")
} }
// Verify the output of the monitor is as expected
expected = "Some string with stuff\n<angle brackets>\n" expected = "Some string with stuff\n<angle brackets>\n"
actual = notice.LastCheckOutput actual = notice.LastCheckOutput
if expected != actual { if expected != actual {
t.Errorf("TestMultiLineConfig(execute > string) check failed") t.Error("Output mismatch")
t.Logf("string expected=`%v`", expected) t.Errorf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual) t.Errorf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected)) }
t.Logf("bytes actual =%v", []byte(actual)) })
t.Run("Test Alert with Multi-Line String", func(t *testing.T) {
alert, ok := config.GetAlert("log_shell")
if !ok {
t.Fatal("Could not find expected alert 'log_shell'")
} }
log.Println("-----") expected := " echo 'Some string with stuff'\n echo '<angle brackets>'\n"
log.Println("TestMultiLineConfig(parse | string)") actual := alert.ShellCommand
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
actual = config.Alerts["log_shell"].Command.ShellCommand
if expected != actual { if expected != actual {
t.Errorf("TestMultiLineConfig(|) failed") t.Error("Heredoc mismatch")
t.Logf("string expected=`%v`", expected) t.Errorf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual) t.Errorf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
} }
log.Println("-----") actual, err = alert.Send(m.AlertNotice{})
log.Println("TestMultiLineConfig(execute | string)")
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
if err != nil { if err != nil {
t.Errorf("Execution of alert failed") t.Fatal("Execution of alert failed")
} }
expected = "Some string with stuff\n<angle brackets>\n" expected = "Some string with stuff\n<angle brackets>\n"
if expected != actual { if expected != actual {
t.Errorf("TestMultiLineConfig(execute | string) check failed") t.Error("Output mismatch")
t.Logf("string expected=`%v`", expected) t.Errorf("string expected=`%v`", expected)
t.Logf("string actual =`%v`", actual) t.Errorf("string actual =`%v`", actual)
t.Logf("bytes expected=%v", []byte(expected))
t.Logf("bytes actual =%v", []byte(actual))
} }
})
} }
+29
View File
@@ -0,0 +1,29 @@
check_interval = "1s"
monitor "mdstat_raid" {
command = [
"sh",
"-c",
"grep -q '\\[U_\\|_U\\]' /host_proc/mdstat && exit 1 || exit 0"
]
check_interval = "30s"
alert_after = 1
alert_every = 0
alert_down = ["email_alert"]
alert_up = ["email_recovery"]
}
alert "email_alert" {
command = [
"sh",
"-c",
"EMAIL=$EMAIL_RECIPIENT; printf 'Subject: RAID ALERT\nTo: %s\n\nRAID degraded\n' \"$EMAIL\" | /usr/local/bin/sendmail -t || true"
]
}
alert "email_recovery" {
command = [
"sh",
"-c",
"EMAIL=$EMAIL_RECIPIENT; printf 'Subject: RAID ALERT\nTo: %s\n\nRAID clean\n' \"$EMAIL\" | /usr/local/bin/sendmail -t || true"
]
}
+23 -4
View File
@@ -1,8 +1,27 @@
module git.iamthefij.com/iamthefij/minitor-go module git.iamthefij.com/iamthefij/minitor-go/v2
go 1.12 go 1.25.0
require ( require (
github.com/prometheus/client_golang v1.2.1 git.iamthefij.com/iamthefij/slog v1.3.0
gopkg.in/yaml.v2 v2.2.4 github.com/hashicorp/hcl/v2 v2.11.1
github.com/prometheus/client_golang v1.23.2
)
require (
github.com/agext/levenshtein v1.2.1 // indirect
github.com/apparentlymart/go-textseg/v13 v13.0.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/procfs v0.16.1 // indirect
github.com/zclconf/go-cty v1.8.0 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/sys v0.35.0 // indirect
golang.org/x/text v0.28.0 // indirect
google.golang.org/protobuf v1.36.8 // indirect
) )
+86 -68
View File
@@ -1,78 +1,96 @@
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/agext/levenshtein v1.2.1 h1:QmvMAjj2aEICytGiWzmxoE0x2KZvE0fvmqMOfy2tjT8=
github.com/alecthomas/units v0.0.0-20190717042225-c3de453c63f4/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/agext/levenshtein v1.2.1/go.mod h1:JEDfjyjHDjOF/1e4FlBE/PkbqA9OfWu2ki2W0IB5558=
github.com/beorn7/perks v0.0.0-20180321164747-3a771d992973/go.mod h1:Dwedo/Wpr24TaqPxmxbtue+5NUziq4I4S80YR8gNf3Q= github.com/apparentlymart/go-dump v0.0.0-20180507223929-23540a00eaa3/go.mod h1:oL81AME2rN47vu18xqj1S1jPIPuN7afo62yKTNn3XMM=
github.com/beorn7/perks v1.0.0/go.mod h1:KWe93zE9D1o94FZ5RNwFwVgaQK1VOXiVxmqh+CedLV8= github.com/apparentlymart/go-textseg v1.0.0/go.mod h1:z96Txxhf3xSFMPmb5X/1W05FF/Nj9VFpLOpjS5yuumk=
github.com/apparentlymart/go-textseg/v13 v13.0.0 h1:Y+KvPE1NYz0xl601PVImeQfFyEy6iT90AvPUL1NNfNw=
github.com/apparentlymart/go-textseg/v13 v13.0.0/go.mod h1:ZK2fH7c4NqDTLtiYLvIkEghdlcqw7yxLeM89kiTRPUo=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cespare/xxhash/v2 v2.1.0 h1:yTUvW7Vhb89inJ+8irsUqiWjh8iT6sQPZiQzI6ReGkA= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.1.0/go.mod h1:dgIUBU3pDso/gPgZ1osOZ0iQf77oPR28Tjxl5dIMyVM= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-test/deep v1.0.3 h1:ZrJSEWsXzPOxaZnFteGEfooLba+ju3FYIbOrS+rQd68=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as= github.com/go-test/deep v1.0.3/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
github.com/go-logfmt/logfmt v0.3.0/go.mod h1:Qt1PoO58o5twSAckw1HlFXLmHsOX5/0LbT9GBnD5lWE= github.com/golang/protobuf v1.1.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/go-logfmt/logfmt v0.4.0/go.mod h1:3RMwSq7FuexP4Kalkev3ejPJsZTpXXBr9+V4qmtdjCk=
github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY=
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
github.com/golang/protobuf v1.3.2 h1:6nsPYzhq5kReh6QImI3k5qWzO4PEbvbIW2cwSfR/6xs= github.com/golang/protobuf v1.3.4/go.mod h1:vzj43D7+SQXF/4pzW/hwtAqwc6iTitCiVSaWz5lYuqw=
github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU=
github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU= github.com/hashicorp/hcl/v2 v2.11.1 h1:yTyWcXcm9XB0TEkyU/JCRU6rYy4K+mgLtzn2wlrJbcc=
github.com/json-iterator/go v1.1.7/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4= github.com/hashicorp/hcl/v2 v2.11.1/go.mod h1:FwWsfWEjyV/CMj8s/gqAuiviY72rJ1/oayI9WftqcKg=
github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/logfmt v0.0.0-20140226030751-b84e30acd515/go.mod h1:+0opPa2QZZtGFBFZlji/RkVcI2GknAs/DXo4wKdlNEc= github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ=
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE=
github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI=
github.com/modern-go/reflect2 v1.0.1/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/kylelemons/godebug v0.0.0-20170820004349-d65d576e9348/go.mod h1:B69LEHPfb2qLo0BaaOLcbitczOKLWTsrBG9LczfCD4k=
github.com/mwitkow/go-conntrack v0.0.0-20161129095857-cc309e4a2223/go.mod h1:qRWi+5nqEBWmkhHvq77mSJWrCKwh8bxhgT7d/eI7P4U= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/pkg/errors v0.8.0/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7 h1:DpOJ2HYzCv8LZP15IdmG+YdwD2luVPHITV96TkirNBM=
github.com/mitchellh/go-wordwrap v0.0.0-20150314170334-ad45545899c7/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v0.9.1/go.mod h1:7SWBe2y4D6OKWSNQJUaRYU/AaXPKyh/dDVn+NZz0KFw= github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.0.0/go.mod h1:db9x61etRT2tGnBNRi70OPL5FsnadC4Ky3P0J6CfImo= github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_golang v1.2.1 h1:JnMpQc6ppsNgw9QPAGF6Dod479itz7lvlsMzzNayLOI= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_golang v1.2.1/go.mod h1:XMU6Z2MjaRKVu/dC1qupJI9SiNkDYzz3xecMgSW/F+U= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4 h1:gQz4mCbXsO+nc9n1hCxHcGA3Zx3Eo+UHZoInFGUIXNM= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
github.com/prometheus/common v0.7.0 h1:L+1lyG48J1zAQXA3RBX/nG/B3gjlHq0zTt2tlbJLyCY= github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog=
github.com/prometheus/common v0.7.0/go.mod h1:DjGbpBbp5NYNiECxcL/VnbXCCaQpKd3tt26CguLLsqA= github.com/sergi/go-diff v1.0.0 h1:Kpca3qRNrduNnOQeazBd0ysaKrUJiIuISHxogkT9RPQ=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/spf13/pflag v1.0.2/go.mod h1:DYY7MBk1bdzusC3SYhjObp+wFpr4gzcvqqNjLnInEg4=
github.com/prometheus/procfs v0.0.5 h1:3+auTFlqw+ZaQYJARz6ArODtkaIwtvBTx3N2NehQlL8=
github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4= github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/vmihailenco/msgpack/v4 v4.3.12/go.mod h1:gborTTJjAo/GWTqqRjrLCn9pgNN+NXzzngzBKDPIqw4=
github.com/vmihailenco/tagparser v0.1.1/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI=
github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8=
github.com/zclconf/go-cty v1.8.0 h1:s4AvqaeQzJIu3ndv4gVIhplVD0krU+bgrcLSVUnaWuA=
github.com/zclconf/go-cty v1.8.0/go.mod h1:vVKLxnk3puL4qRAv72AO+W99LUD4da90g3uUAzyuvAk=
github.com/zclconf/go-cty-debug v0.0.0-20191215020915-b22d67c1ba0b/go.mod h1:ZRKQfBXbGkpdV6QMzT3rU1kSTAnfu1dO8dPKjYprgj8=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/net v0.0.0-20181114220301-adae6a3d119a/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/net v0.0.0-20190613194153-d28f0bde5980/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= golang.org/x/net v0.0.0-20180811021610-c39426892332/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/net v0.0.0-20200301022130-244492dfa37a/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190502175342-a43fa875dd82/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM=
google.golang.org/appengine v1.6.5/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.1/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/yaml.v2 v2.2.4 h1:/eiJrUcujPVeJ3xlSWaiNi3uSVmDGBK1pDHUHAnao1I= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
+93 -52
View File
@@ -1,16 +1,16 @@
package main package main
import ( import (
"errors"
"flag" "flag"
"fmt" "fmt"
"log" "strings"
"time" "time"
"git.iamthefij.com/iamthefij/slog"
) )
var ( var (
// LogDebug will control whether debug messsages should be logged
LogDebug = false
// ExportMetrics will track whether or not we want to export metrics to prometheus // ExportMetrics will track whether or not we want to export metrics to prometheus
ExportMetrics = false ExportMetrics = false
// MetricsPort is the port to expose metrics on // MetricsPort is the port to expose metrics on
@@ -18,63 +18,70 @@ var (
// Metrics contains all active metrics // Metrics contains all active metrics
Metrics = NewMetrics() Metrics = NewMetrics()
// PyCompat enables support for legacy Python templates
PyCompat = false
// version of minitor being run // version of minitor being run
version = "dev" version = "dev"
errUnknownAlert = errors.New("unknown alert")
) )
func checkMonitors(config *Config) error { func SendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
for _, monitor := range config.Monitors { slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, success)
Metrics.CountCheck(monitor.Name, success, hasAlert)
// Should probably consider refactoring everything below here
if alertNotice != nil {
if LogDebug {
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
}
alertNames := monitor.GetAlertNames(alertNotice.IsUp) alertNames := monitor.GetAlertNames(alertNotice.IsUp)
if alertNames == nil { if alertNames == nil {
// This should only happen for a recovery alert. AlertDown is validated not empty // This should only happen for a recovery alert. AlertDown is validated not empty
log.Printf( slog.Warningf(
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t", "Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
alertNotice.MonitorName, alertNotice.IsUp, alertNotice.MonitorName, alertNotice.IsUp,
) )
return nil
} }
for _, alertName := range alertNames { for _, alertName := range alertNames {
if alert, ok := config.Alerts[alertName]; ok { if alert, ok := config.GetAlert(alertName); ok {
output, err := alert.Send(*alertNotice) output, err := alert.Send(*alertNotice)
if err != nil { if err != nil {
log.Printf( slog.Errorf(
"ERROR: Alert '%s' failed. result=%v: output=%s", "Alert '%s' failed. result=%v: output=%s",
alert.Name, alert.Name,
err, err,
output, output,
) )
return fmt.Errorf(
"Unsuccessfully triggered alert '%s'. "+ return err
"Crashing to avoid false negatives: %v",
alert.Name,
err,
)
} }
// Count alert metrics // Count alert metrics
Metrics.CountAlert(monitor.Name, alert.Name) Metrics.CountAlert(monitor.Name, alert.Name)
} else { } else {
// This case should never actually happen since we validate against it // This case should never actually happen since we validate against it
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName) slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
} }
} }
return nil
}
func CheckMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckMilliseconds(), hasAlert)
if alertNotice != nil {
err := SendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, exit early and bubble it up
if err != nil {
return err
}
} }
} }
} }
@@ -82,41 +89,75 @@ func checkMonitors(config *Config) error {
return nil return nil
} }
func SendStartupAlerts(config *Config, alertNames []string) error {
for _, alertName := range alertNames {
var err error
alert, ok := config.GetAlert(alertName)
if !ok {
err = fmt.Errorf("unknown alert %s: %w", alertName, errUnknownAlert)
}
if err == nil {
_, err = alert.Send(AlertNotice{
AlertCount: 0,
FailureCount: 0,
IsUp: true,
LastSuccess: time.Now(),
MonitorName: fmt.Sprintf("First Run Alert Test: %s", alert.Name),
LastCheckOutput: "",
})
}
if err != nil {
return err
}
}
return nil
}
func main() { func main() {
// Get debug flag showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)") configPath := flag.String("config", "config.hcl", "Alternate configuration path (default: config.hcl)")
startupAlerts := flag.String("startup-alerts", "", "List of alerts to run on startup. This can help determine unhealthy alerts early on. (default \"\")")
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)") flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)") flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
var configPath = flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
flag.Parse() flag.Parse()
// Print version if flag is provided // Print version if flag is provided
if *showVersion { if *showVersion {
log.Println("Minitor version:", version) fmt.Println("Minitor version:", version)
return return
} }
// Load configuration // Load configuration
config, err := LoadConfig(*configPath) config, err := LoadConfig(*configPath)
if err != nil { slog.OnErrFatalf(err, "Error loading config")
log.Fatalf("Error loading config: %v", err)
}
// Serve metrics exporter, if specified // Serve metrics exporter, if specified
if ExportMetrics { if ExportMetrics {
log.Println("INFO: Exporting metrics to Prometheus") slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
go ServeMetrics() go ServeMetrics()
} }
if *startupAlerts != "" {
alertNames := strings.Split(*startupAlerts, ",")
err = SendStartupAlerts(&config, alertNames)
slog.OnErrPanicf(err, "Error running startup alerts")
}
// Start main loop // Start main loop
for { for {
err = checkMonitors(&config) err = CheckMonitors(&config)
if err != nil { slog.OnErrPanicf(err, "Error checking monitors")
panic(err)
}
sleepTime := time.Duration(config.CheckInterval) * time.Second time.Sleep(config.CheckInterval)
time.Sleep(sleepTime)
} }
} }
+148 -70
View File
@@ -1,114 +1,192 @@
package main package main_test
import "testing" import (
"testing"
m "git.iamthefij.com/iamthefij/minitor-go/v2"
)
func Ptr[T any](v T) *T {
return &v
}
// TestCheckConfig tests the checkConfig function
// It also tests results for potentially invalid configuration. For example, no alerts
func TestCheckMonitors(t *testing.T) { func TestCheckMonitors(t *testing.T) {
cases := []struct { cases := []struct {
config Config config m.Config
expectErr bool expectFailureError bool
expectRecoverError bool
name string name string
}{ }{
{ {
config: Config{}, config: m.Config{
expectErr: false, CheckIntervalStr: "1s",
name: "Empty", Monitors: []*m.Monitor{
},
{ {
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Success", Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
}, },
}, },
}, },
expectErr: false, expectFailureError: false,
name: "Monitor success, no alerts", expectRecoverError: false,
name: "No alerts",
}, },
{ {
config: Config{ config: m.Config{
Monitors: []*Monitor{ CheckIntervalStr: "1s",
&Monitor{ Monitors: []*m.Monitor{
{
Name: "Failure", Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertAfter: 1,
},
&Monitor{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"unknown"}, AlertDown: []string{"unknown"},
AlertAfter: 1,
},
},
},
expectErr: false,
name: "Monitor failure, no and unknown alerts",
},
{
config: Config{
Monitors: []*Monitor{
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"ls"}},
alertCount: 1,
},
&Monitor{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"}, AlertUp: []string{"unknown"},
alertCount: 1, AlertAfter: 1,
}, },
}, },
}, },
expectErr: false, expectFailureError: true,
name: "Monitor recovery, no alerts", expectRecoverError: true,
name: "Unknown alerts",
}, },
{ {
config: Config{ config: m.Config{
Monitors: []*Monitor{ CheckIntervalStr: "1s",
&Monitor{ Monitors: []*m.Monitor{
{
Name: "Failure", Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"good"}, AlertDown: []string{"good"},
AlertUp: []string{"good"},
AlertAfter: 1, AlertAfter: 1,
}, },
}, },
Alerts: map[string]*Alert{ Alerts: []*m.Alert{{
"good": &Alert{ Name: "good",
Command: CommandOrShell{Command: []string{"true"}}, Command: []string{"true"},
}},
}, },
}, expectFailureError: false,
}, expectRecoverError: false,
expectErr: false, name: "Successful alert",
name: "Monitor failure, successful alert",
}, },
{ {
config: Config{ config: m.Config{
Monitors: []*Monitor{ CheckIntervalStr: "1s",
&Monitor{ Monitors: []*m.Monitor{
{
Name: "Failure", Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"}, AlertDown: []string{"bad"},
AlertUp: []string{"bad"},
AlertAfter: 1, AlertAfter: 1,
}, },
}, },
Alerts: map[string]*Alert{ Alerts: []*m.Alert{{
"bad": &Alert{
Name: "bad", Name: "bad",
Command: CommandOrShell{Command: []string{"false"}}, Command: []string{"false"},
}},
}, },
}, expectFailureError: true,
}, expectRecoverError: true,
expectErr: true, name: "Failing alert",
name: "Monitor failure, bad alert",
}, },
} }
for _, c := range cases { for _, c := range cases {
c.config.Init() c := c
err := checkMonitors(&c.config)
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.config.Init()
if err != nil {
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
}
for _, check := range []struct {
shellCmd string
name string
expectErr bool
}{
{"false", "Failure", c.expectFailureError}, {"true", "Success", c.expectRecoverError},
} {
// Set the shell command for this check
c.config.Monitors[0].ShellCommand = check.shellCmd
// Run the check
err = m.CheckMonitors(&c.config)
// Check the results
if err == nil && check.expectErr {
t.Errorf("checkMonitors(%s:%s): Expected error, the code did not error", c.name, check.name)
} else if err != nil && !check.expectErr {
t.Errorf("checkMonitors(%s:%s): Did not expect an error, but we got one anyway: %v", c.name, check.name, err)
}
}
})
}
}
func TestFirstRunAlerts(t *testing.T) {
cases := []struct {
config m.Config
expectErr bool
startupAlerts []string
name string
}{
{
config: m.Config{
CheckIntervalStr: "1s",
},
expectErr: true,
startupAlerts: []string{"missing"},
name: "Unknown",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Alerts: []*m.Alert{
{
Name: "good",
Command: []string{"true"},
},
},
},
expectErr: false,
startupAlerts: []string{"good"},
name: "Successful alert",
},
{
config: m.Config{
CheckIntervalStr: "1s",
Alerts: []*m.Alert{
{
Name: "bad",
Command: []string{"false"},
},
},
},
expectErr: true,
startupAlerts: []string{"bad"},
name: "Failed alert",
},
}
for _, c := range cases {
c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
err := c.config.Init()
if err != nil {
t.Errorf("sendFirstRunAlerts(%s): unexpected error reading config: %v", c.name, err)
}
err = m.SendStartupAlerts(&c.config, c.startupAlerts)
if err == nil && c.expectErr { if err == nil && c.expectErr {
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name) t.Errorf("sendFirstRunAlerts(%s): Expected error, the code did not error", c.name)
} } else if err != nil && !c.expectErr {
t.Errorf("sendFirstRunAlerts(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
}
})
} }
} }
-25
View File
@@ -1,25 +0,0 @@
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}latest{{/if}}
{{#if build.tags}}
tags:
{{#each build.tags}}
- {{this}}
{{/each}}
{{/if}}
manifests:
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-amd64
platform:
architecture: amd64
os: linux
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm64
platform:
architecture: arm64
os: linux
variant: v8
-
image: iamthefij/minitor-go:{{#if build.tag}}{{trimPrefix "v" build.tag}}-{{/if}}linux-arm
platform:
architecture: arm
os: linux
variant: v7
+17 -1
View File
@@ -19,6 +19,7 @@ import (
type MinitorMetrics struct { type MinitorMetrics struct {
alertCount *prometheus.CounterVec alertCount *prometheus.CounterVec
checkCount *prometheus.CounterVec checkCount *prometheus.CounterVec
checkTime *prometheus.GaugeVec
monitorStatus *prometheus.GaugeVec monitorStatus *prometheus.GaugeVec
} }
@@ -40,6 +41,13 @@ func NewMetrics() *MinitorMetrics {
}, },
[]string{"monitor", "status", "is_alert"}, []string{"monitor", "status", "is_alert"},
), ),
checkTime: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_check_milliseconds",
Help: "Time in miliseconds that a check ran for",
},
[]string{"monitor", "status"},
),
monitorStatus: prometheus.NewGaugeVec( monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{ prometheus.GaugeOpts{
Name: "minitor_monitor_up_count", Name: "minitor_monitor_up_count",
@@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
// Register newly created metrics // Register newly created metrics
prometheus.MustRegister(metrics.alertCount) prometheus.MustRegister(metrics.alertCount)
prometheus.MustRegister(metrics.checkCount) prometheus.MustRegister(metrics.checkCount)
prometheus.MustRegister(metrics.checkTime)
prometheus.MustRegister(metrics.monitorStatus) prometheus.MustRegister(metrics.monitorStatus)
return metrics return metrics
@@ -63,11 +72,12 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
if isUp { if isUp {
val = 1.0 val = 1.0
} }
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val) metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
} }
// CountCheck counts the result of a particular Monitor check // CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) { func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, ms int64, isAlert bool) {
status := "failure" status := "failure"
if isSuccess { if isSuccess {
status = "success" status = "success"
@@ -81,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
metrics.checkCount.With( metrics.checkCount.With(
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal}, prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
).Inc() ).Inc()
metrics.checkTime.With(
prometheus.Labels{"monitor": monitor, "status": status},
).Set(float64(ms))
} }
// CountAlert counts an alert // CountAlert counts an alert
@@ -96,6 +110,8 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
// ServeMetrics starts an http server with a Prometheus metrics handler // ServeMetrics starts an http server with a Prometheus metrics handler
func ServeMetrics() { func ServeMetrics() {
http.Handle("/metrics", promhttp.Handler()) http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort) host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil) _ = http.ListenAndServe(host, nil)
} }
+162 -80
View File
@@ -1,82 +1,164 @@
package main package main
import ( import (
"log" "errors"
"fmt"
"math" "math"
"os/exec" "os/exec"
"time" "time"
"git.iamthefij.com/iamthefij/slog"
) )
// Monitor represents a particular periodic check of a command // Monitor represents a particular periodic check of a command
type Monitor struct { type Monitor struct { //nolint:maligned
// Config values // Config values
Name string CheckIntervalStr *string `hcl:"check_interval,optional"`
Command CommandOrShell CheckInterval time.Duration
AlertDown []string `yaml:"alert_down"`
AlertUp []string `yaml:"alert_up"` Name string `hcl:"name,label"`
CheckInterval float64 `yaml:"check_interval"` AlertCount int
AlertAfter int16 `yaml:"alert_after"` AlertAfter int `hcl:"alert_after,optional"`
AlertEvery int16 `yaml:"alert_every"` AlertEvery *int `hcl:"alert_every,optional"`
AlertDown []string `hcl:"alert_down,optional"`
AlertUp []string `hcl:"alert_up,optional"`
Command []string `hcl:"command,optional"`
ShellCommand string `hcl:"shell_command,optional"`
// Other values // Other values
failureCount int
lastCheck time.Time lastCheck time.Time
lastOutput string
alertCount int16
failureCount int16
lastSuccess time.Time lastSuccess time.Time
lastOutput string
lastCheckDuration time.Duration
} }
// IsValid returns a boolean indicating if the Monitor has been correctly // Init initializes the Monitor with default values
// configured func (monitor *Monitor) Init(defaultAlertAfter int, defaultAlertEvery *int, defaultAlertDown []string, defaultAlertUp []string) error {
func (monitor Monitor) IsValid() bool { // Parse the check_interval string into a time.Duration
return (!monitor.Command.Empty() && if monitor.CheckIntervalStr != nil {
monitor.getAlertAfter() > 0 && var err error
monitor.AlertDown != nil)
monitor.CheckInterval, err = time.ParseDuration(*monitor.CheckIntervalStr)
if err != nil {
return fmt.Errorf("failed to parse check_interval duration for monitor %s: %w", monitor.Name, err)
}
} }
// ShouldCheck returns a boolean indicating if the Monitor is ready to be // Set default values for monitor alerts
// be checked again if monitor.AlertAfter == 0 {
minAlertAfter := 1
monitor.AlertAfter = max(defaultAlertAfter, minAlertAfter)
}
if monitor.AlertEvery == nil {
monitor.AlertEvery = defaultAlertEvery
}
if len(monitor.AlertDown) == 0 {
monitor.AlertDown = defaultAlertDown
}
if len(monitor.AlertUp) == 0 {
monitor.AlertUp = defaultAlertUp
}
return nil
}
// Validate checks that the Monitor is properly configured and returns errors if not
func (monitor Monitor) Validate() error {
hasCommand := len(monitor.Command) > 0
hasShellCommand := monitor.ShellCommand != ""
hasValidAlertAfter := monitor.AlertAfter > 0
hasAlertDown := len(monitor.AlertDown) > 0
var err error
hasAtLeastOneCommand := hasCommand || hasShellCommand
if !hasAtLeastOneCommand {
err = errors.Join(err, fmt.Errorf(
"%w: monitor %s has no command or shell_command configured",
ErrInvalidMonitor,
monitor.Name,
))
}
hasAtMostOneCommand := !(hasCommand && hasShellCommand)
if !hasAtMostOneCommand {
err = errors.Join(err, fmt.Errorf(
"%w: monitor %s has both command and shell_command configured",
ErrInvalidMonitor,
monitor.Name,
))
}
if !hasValidAlertAfter {
err = errors.Join(err, fmt.Errorf(
"%w: monitor %s has invalid alert_after value %d. Must be greater than 0",
ErrInvalidMonitor,
monitor.Name,
monitor.AlertAfter,
))
}
if !hasAlertDown {
err = errors.Join(err, fmt.Errorf(
"%w: monitor %s has no alert_down configured. Configure one here or add a default_alert_down",
ErrInvalidMonitor,
monitor.Name,
))
}
return err
}
func (monitor Monitor) LastOutput() string {
return monitor.lastOutput
}
// ShouldCheck returns a boolean indicating if the Monitor is ready to be be checked again
func (monitor Monitor) ShouldCheck() bool { func (monitor Monitor) ShouldCheck() bool {
if monitor.lastCheck.IsZero() { if monitor.lastCheck.IsZero() || monitor.CheckInterval == 0 {
return true return true
} }
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds() sinceLastCheck := time.Since(monitor.lastCheck)
return sinceLastCheck >= monitor.CheckInterval return sinceLastCheck >= monitor.CheckInterval
} }
// Check will run the command configured by the Monitor and return a status // Check will run the command configured by the Monitor and return a status and a possible AlertNotice
// and a possible AlertNotice
func (monitor *Monitor) Check() (bool, *AlertNotice) { func (monitor *Monitor) Check() (bool, *AlertNotice) {
var cmd *exec.Cmd var cmd *exec.Cmd
if monitor.Command.Command != nil { if len(monitor.Command) > 0 {
cmd = exec.Command(monitor.Command.Command[0], monitor.Command.Command[1:]...) cmd = exec.Command(monitor.Command[0], monitor.Command[1:]...)
} else if monitor.ShellCommand != "" {
cmd = ShellCommand(monitor.ShellCommand)
} else { } else {
cmd = ShellCommand(monitor.Command.ShellCommand) slog.Fatalf("Monitor %s has no command configured", monitor.Name)
} }
checkStartTime := time.Now()
output, err := cmd.CombinedOutput() output, err := cmd.CombinedOutput()
monitor.lastCheck = time.Now() monitor.lastCheck = time.Now()
monitor.lastOutput = string(output) monitor.lastOutput = string(output)
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
var alertNotice *AlertNotice var alertNotice *AlertNotice
isSuccess := (err == nil) isSuccess := (err == nil)
if isSuccess { if isSuccess {
alertNotice = monitor.success() alertNotice = monitor.Success()
} else { } else {
alertNotice = monitor.failure() alertNotice = monitor.Failure()
} }
if LogDebug { slog.Debugf("Command output: %s", monitor.lastOutput)
log.Printf("DEBUG: Command output: %s", monitor.lastOutput) slog.OnErrWarnf(err, "Command result: %v", err)
}
if err != nil {
if LogDebug {
log.Printf("DEBUG: Command result: %v", err)
}
}
log.Printf( slog.Infof(
"INFO: %s success=%t, alert=%t", "%s success=%t, alert=%t",
monitor.Name, monitor.Name,
isSuccess, isSuccess,
alertNotice != nil, alertNotice != nil,
@@ -85,89 +167,89 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
return isSuccess, alertNotice return isSuccess, alertNotice
} }
func (monitor Monitor) isUp() bool { // GetAlertNames gives a list of alert names for a given monitor status
return monitor.alertCount == 0 func (monitor Monitor) GetAlertNames(up bool) []string {
if up {
return monitor.AlertUp
} }
func (monitor *Monitor) success() (notice *AlertNotice) { return monitor.AlertDown
if !monitor.isUp() { }
// IsUp returns the status of the current monitor
func (monitor Monitor) IsUp() bool {
return monitor.AlertCount == 0
}
// LastCheckMilliseconds gives number of miliseconds the last check ran for
func (monitor Monitor) LastCheckMilliseconds() int64 {
return monitor.lastCheckDuration.Milliseconds()
}
func (monitor *Monitor) Success() (notice *AlertNotice) {
if !monitor.IsUp() {
// Alert that we have recovered // Alert that we have recovered
notice = monitor.createAlertNotice(true) notice = monitor.createAlertNotice(true)
} }
monitor.failureCount = 0 monitor.failureCount = 0
monitor.alertCount = 0 monitor.AlertCount = 0
monitor.lastSuccess = time.Now() monitor.lastSuccess = time.Now()
return return
} }
func (monitor *Monitor) failure() (notice *AlertNotice) { func (monitor *Monitor) Failure() (notice *AlertNotice) {
monitor.failureCount++ monitor.failureCount++
// If we haven't hit the minimum failures, we can exit // If we haven't hit the minimum failures, we can exit
if monitor.failureCount < monitor.getAlertAfter() { if monitor.failureCount < monitor.AlertAfter {
if LogDebug { slog.Debugf(
log.Printf( "%s failed but did not hit minimum failures. "+
"DEBUG: %s failed but did not hit minimum failures. "+
"Count: %v alert after: %v", "Count: %v alert after: %v",
monitor.Name, monitor.Name,
monitor.failureCount, monitor.failureCount,
monitor.getAlertAfter(), monitor.AlertAfter,
) )
}
return return
} }
// Take number of failures after minimum // Take number of failures after minimum
failureCount := (monitor.failureCount - monitor.getAlertAfter()) failureCount := (monitor.failureCount - monitor.AlertAfter)
// Use alert cadence to determine if we should alert // Use alert cadence to determine if we should alert
if monitor.AlertEvery > 0 { switch {
// Handle integer number of failures before alerting case monitor.AlertEvery == nil, *monitor.AlertEvery == 0:
if failureCount%monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
} else if monitor.AlertEvery == 0 {
// Handle alerting on first failure only // Handle alerting on first failure only
if failureCount == 0 { if failureCount == 0 {
notice = monitor.createAlertNotice(false) notice = monitor.createAlertNotice(false)
} }
} else { case *monitor.AlertEvery > 0:
// Handle integer number of failures before alerting
if failureCount%*monitor.AlertEvery == 0 {
notice = monitor.createAlertNotice(false)
}
default:
// Handle negative numbers indicating an exponential backoff // Handle negative numbers indicating an exponential backoff
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { if failureCount >= int(math.Pow(2, float64(monitor.AlertCount))-1) { //nolint:mnd
notice = monitor.createAlertNotice(false) notice = monitor.createAlertNotice(false)
} }
} }
// If we're going to alert, increment count // If we're going to alert, increment count
if notice != nil { if notice != nil {
monitor.alertCount++ monitor.AlertCount++
notice.AlertCount = monitor.AlertCount
} }
return return notice
}
func (monitor Monitor) getAlertAfter() int16 {
// TODO: Come up with a better way than this method
// Zero is one!
if monitor.AlertAfter == 0 {
return 1
}
return monitor.AlertAfter
}
// GetAlertNames gives a list of alert names for a given monitor status
func (monitor Monitor) GetAlertNames(up bool) []string {
if up {
return monitor.AlertUp
}
return monitor.AlertDown
} }
func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice { func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
// TODO: Maybe add something about recovery status here // TODO: Maybe add something about recovery status here
return &AlertNotice{ return &AlertNotice{
MonitorName: monitor.Name, MonitorName: monitor.Name,
AlertCount: monitor.alertCount, AlertCount: monitor.AlertCount,
FailureCount: monitor.failureCount, FailureCount: monitor.failureCount,
LastCheckOutput: monitor.lastOutput, LastCheckOutput: monitor.lastOutput,
LastSuccess: monitor.lastSuccess, LastSuccess: monitor.lastSuccess,
+159 -129
View File
@@ -1,159 +1,196 @@
package main package main_test
import ( import (
"log" "errors"
"reflect"
"testing" "testing"
"time" "time"
m "git.iamthefij.com/iamthefij/minitor-go/v2"
) )
// TestMonitorIsValid tests the Monitor.IsValid() func TestMonitorValidate(t *testing.T) {
func TestMonitorIsValid(t *testing.T) { t.Parallel()
cases := []struct { cases := []struct {
monitor Monitor monitor m.Monitor
expected bool expected error
name string name string
}{ }{
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}, AlertDown: []string{"log"}}, true, "Command only"}, {m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, nil, "Command only"},
{Monitor{Command: CommandOrShell{ShellCommand: "echo test"}, AlertDown: []string{"log"}}, true, "CommandShell only"}, {m.Monitor{AlertAfter: 1, ShellCommand: "echo test", AlertDown: []string{"log"}}, nil, "CommandShell only"},
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}}, false, "No AlertDown"}, {m.Monitor{AlertAfter: 1, Command: []string{"echo", "test"}}, m.ErrInvalidMonitor, "No AlertDown"},
{Monitor{AlertDown: []string{"log"}}, false, "No commands"}, {m.Monitor{AlertAfter: 1, AlertDown: []string{"log"}}, m.ErrInvalidMonitor, "No commands"},
{Monitor{Command: CommandOrShell{Command: []string{"echo", "test"}}, AlertDown: []string{"log"}, AlertAfter: -1}, false, "Invalid alert threshold, -1"}, {m.Monitor{AlertAfter: -1, Command: []string{"echo", "test"}, AlertDown: []string{"log"}}, m.ErrInvalidMonitor, "Invalid alert threshold, -1"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
actual := c.monitor.IsValid()
if actual != c.expected { t.Run(c.name, func(t *testing.T) {
t.Parallel()
actual := c.monitor.Validate()
hasErr := (actual != nil)
expectErr := (c.expected != nil)
if hasErr != expectErr || !errors.Is(actual, c.expected) {
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual) t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----") })
} }
} }
// TestMonitorShouldCheck tests the Monitor.ShouldCheck() // TestMonitorShouldCheck tests the Monitor.ShouldCheck()
func TestMonitorShouldCheck(t *testing.T) { func TestMonitorShouldCheck(t *testing.T) {
timeNow := time.Now() t.Parallel()
timeTenSecAgo := time.Now().Add(time.Second * -10)
timeTwentySecAgo := time.Now().Add(time.Second * -20)
cases := []struct { // Create a monitor that should check every second and then verify it checks with some sleeps
monitor Monitor monitor := m.Monitor{ShellCommand: "true", CheckInterval: time.Second}
expected bool
name string if !monitor.ShouldCheck() {
}{ t.Errorf("New monitor should be ready to check")
{Monitor{}, true, "Empty"},
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
} }
for _, c := range cases { monitor.Check()
actual := c.monitor.ShouldCheck()
if actual != c.expected { if monitor.ShouldCheck() {
t.Errorf("ShouldCheck(%v), expected=%t actual=%t", c.name, c.expected, actual) t.Errorf("Monitor should not be ready to check after a check")
} }
time.Sleep(time.Second)
if !monitor.ShouldCheck() {
t.Errorf("Monitor should be ready to check after a second")
} }
} }
// TestMonitorIsUp tests the Monitor.isUp() // TestMonitorIsUp tests the Monitor.IsUp()
func TestMonitorIsUp(t *testing.T) { func TestMonitorIsUp(t *testing.T) {
cases := []struct { t.Parallel()
monitor Monitor
expected bool // Creating a monitor that should alert after 2 failures. The monitor should be considered up until we reach two failed checks
name string monitor := m.Monitor{ShellCommand: "false", AlertAfter: 2}
}{ if !monitor.IsUp() {
{Monitor{}, true, "Empty"}, t.Errorf("New monitor should be considered up")
{Monitor{alertCount: 1}, false, "Has alert"},
{Monitor{alertCount: -1}, false, "Negative alerts"},
{Monitor{alertCount: 0}, true, "No alerts"},
} }
for _, c := range cases { monitor.Check()
log.Printf("Testing case %s", c.name)
actual := c.monitor.isUp() if !monitor.IsUp() {
if actual != c.expected { t.Errorf("Monitor should be considered up with one failure and no alerts")
t.Errorf("isUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----")
monitor.Check()
if monitor.IsUp() {
t.Errorf("Monitor should be considered down with one alert")
} }
} }
// TestMonitorGetAlertNames tests that proper alert names are returned // TestMonitorGetAlertNames tests that proper alert names are returned
func TestMonitorGetAlertNames(t *testing.T) { func TestMonitorGetAlertNames(t *testing.T) {
cases := []struct { cases := []struct {
monitor Monitor monitor m.Monitor
up bool up bool
expected []string expected []string
name string name string
}{ }{
{Monitor{}, true, nil, "Empty up"}, {m.Monitor{}, true, nil, "Empty up"},
{Monitor{}, false, nil, "Empty down"}, {m.Monitor{}, false, nil, "Empty down"},
{Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"}, {m.Monitor{AlertUp: []string{"alert"}}, true, []string{"alert"}, "Return up"},
{Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"}, {m.Monitor{AlertDown: []string{"alert"}}, false, []string{"alert"}, "Return down"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
actual := c.monitor.GetAlertNames(c.up) actual := c.monitor.GetAlertNames(c.up)
if !EqualSliceString(actual, c.expected) { if !reflect.DeepEqual(actual, c.expected) {
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual) t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----") })
} }
} }
// TestMonitorSuccess tests the Monitor.success() func TestMonitorAlertCount(t *testing.T) {
func TestMonitorSuccess(t *testing.T) { alertEvery := 1
cases := []struct { cases := []struct {
monitor Monitor checkSuccess bool
expectNotice bool alertCount int
name string name string
}{ }{
{Monitor{}, false, "Empty"}, {false, 1, "First failure and first alert"},
{Monitor{alertCount: 0}, false, "No alerts"}, {false, 2, "Second failure and first alert"},
{Monitor{alertCount: 1}, true, "Has alert"}, {true, 2, "Success should preserve past alert count"},
{false, 1, "First failure and first alert after success"},
} }
// Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method
monitor := m.Monitor{AlertAfter: 1, AlertEvery: &alertEvery}
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) t.Logf("Testing case %s", c.name)
notice := c.monitor.success()
hasNotice := (notice != nil) var notice *m.AlertNotice
if hasNotice != c.expectNotice { if c.checkSuccess {
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice) notice = monitor.Success()
log.Printf("Case failed: %s", c.name) } else {
notice = monitor.Failure()
}
if notice == nil {
t.Fatalf("failure(%v) expected notice, got nil", c.name)
}
if notice.AlertCount != c.alertCount {
t.Errorf("failure(%v), expected=%v actual=%v", c.name, c.alertCount, notice.AlertCount)
t.Logf("Case failed: %s", c.name)
} }
log.Println("-----")
} }
} }
// TestMonitorFailureAlertAfter tests that alerts will not trigger until // TestMonitorFailureAlertAfter tests that alerts will not trigger until
// hitting the threshold provided by AlertAfter // hitting the threshold provided by AlertAfter
func TestMonitorFailureAlertAfter(t *testing.T) { func TestMonitorFailureAlertAfter(t *testing.T) {
var alertEveryOne int = 1
cases := []struct { cases := []struct {
monitor Monitor monitor m.Monitor
numChecks int
expectNotice bool expectNotice bool
name string name string
}{ }{
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because and AlertEvery default to 0 {m.Monitor{ShellCommand: "false", AlertAfter: 1}, 1, true, "Empty After 1"}, // Defaults to true because and AlertEvery default to 0
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: first failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 1, true, "Alert after 1: first failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert after 1: second failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryOne}, 2, true, "Alert after 1: second failure"},
{Monitor{failureCount: 0, AlertAfter: 20, AlertEvery: 1}, false, "Alert after 20: first failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 1, false, "Alert after 20: first failure"},
{Monitor{failureCount: 19, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 20th failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 20, true, "Alert after 20: 20th failure"},
{Monitor{failureCount: 20, AlertAfter: 20, AlertEvery: 1}, true, "Alert after 20: 21st failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 20, AlertEvery: &alertEveryOne}, 21, true, "Alert after 20: 21st failure"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
notice := c.monitor.failure()
hasNotice := (notice != nil) t.Run(c.name, func(t *testing.T) {
t.Parallel()
hasNotice := false
for i := 0; i < c.numChecks; i++ {
_, notice := c.monitor.Check()
hasNotice = (notice != nil)
}
if hasNotice != c.expectNotice { if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice) t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----") })
} }
} }
@@ -161,51 +198,42 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
// on the expected intervals // on the expected intervals
func TestMonitorFailureAlertEvery(t *testing.T) { func TestMonitorFailureAlertEvery(t *testing.T) {
cases := []struct { cases := []struct {
monitor Monitor monitor m.Monitor
expectNotice bool expectedNotice []bool
name string name string
}{ }{
/* {m.Monitor{ShellCommand: "false", AlertAfter: 1}, []bool{true}, "No AlertEvery set"}, // Defaults to true because AlertAfter and AlertEvery default to nil
TODO: Actually found a bug in original implementation. There is an inconsistency in the way AlertAfter is treated.
For "First alert only" (ie. AlertEvery=0), it is the number of failures to ignore before alerting, so AlertAfter=1
will ignore the first failure and alert on the second failure
For other intervals (ie. AlertEvery=1), it is essentially indexed on one. Essentially making AlertAfter=1 trigger
on the first failure.
For usabilty, this should be consistent. Consistent with what though? minitor-py? Or itself? Dun dun duuuunnnnn!
*/
{Monitor{AlertAfter: 1}, true, "Empty"}, // Defaults to true because AlertAfter and AlertEvery default to 0
// Alert first time only, after 1 // Alert first time only, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 0}, true, "Alert first time only after 1: first failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(0)}, []bool{true, false, false}, "Alert first time only after 1"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 0}, false, "Alert first time only after 1: third failure"},
// Alert every time, after 1 // Alert every time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: first failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(1)}, []bool{true, true, true}, "Alert every time after 1"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: second failure"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 1}, true, "Alert every time after 1: third failure"},
// Alert every other time, after 1 // Alert every other time, after 1
{Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: first failure"}, {m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: Ptr(2)}, []bool{true, false, true, false}, "Alert every other time after 1"},
{Monitor{failureCount: 1, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: second failure"},
{Monitor{failureCount: 2, AlertAfter: 1, AlertEvery: 2}, true, "Alert every other time after 1: third failure"},
{Monitor{failureCount: 3, AlertAfter: 1, AlertEvery: 2}, false, "Alert every other time after 1: fourth failure"},
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
notice := c.monitor.failure() t.Run(c.name, func(t *testing.T) {
t.Parallel()
for i, expectNotice := range c.expectedNotice {
_, notice := c.monitor.Check()
hasNotice := (notice != nil) hasNotice := (notice != nil)
if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice) if hasNotice != expectNotice {
log.Printf("Case failed: %s", c.name) t.Errorf("failed %s check %d: expected=%t actual=%t", c.name, i, expectNotice, hasNotice)
} }
log.Println("-----") }
})
} }
} }
// TestMonitorFailureExponential tests that alerts will trigger // TestMonitorFailureExponential tests that alerts will trigger
// with an exponential backoff after repeated failures // with an exponential backoff after repeated failures
func TestMonitorFailureExponential(t *testing.T) { func TestMonitorFailureExponential(t *testing.T) {
var alertEveryExp int = -1
cases := []struct { cases := []struct {
expectNotice bool expectNotice bool
name string name string
@@ -222,17 +250,18 @@ func TestMonitorFailureExponential(t *testing.T) {
// Unlike previous tests, this one requires a static Monitor with repeated // Unlike previous tests, this one requires a static Monitor with repeated
// calls to the failure method // calls to the failure method
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1} monitor := m.Monitor{ShellCommand: "false", AlertAfter: 1, AlertEvery: &alertEveryExp}
for _, c := range cases {
log.Printf("Testing case %s", c.name)
notice := monitor.failure() for _, c := range cases {
t.Run(c.name, func(t *testing.T) {
// NOTE: These tests are not parallel because they rely on the state of the Monitor
_, notice := monitor.Check()
hasNotice := (notice != nil) hasNotice := (notice != nil)
if hasNotice != c.expectNotice { if hasNotice != c.expectNotice {
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice) t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----") })
} }
} }
@@ -243,53 +272,54 @@ func TestMonitorCheck(t *testing.T) {
hasNotice bool hasNotice bool
lastOutput string lastOutput string
} }
cases := []struct { cases := []struct {
monitor Monitor monitor m.Monitor
expect expected expect expected
name string name string
}{ }{
{ {
Monitor{Command: CommandOrShell{Command: []string{"echo", "success"}}}, m.Monitor{AlertAfter: 1, Command: []string{"echo", "success"}},
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"}, expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
"Test successful command", "Test successful command",
}, },
{ {
Monitor{Command: CommandOrShell{ShellCommand: "echo success"}}, m.Monitor{AlertAfter: 1, ShellCommand: "echo success"},
expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"}, expected{isSuccess: true, hasNotice: false, lastOutput: "success\n"},
"Test successful command shell", "Test successful command shell",
}, },
{ {
Monitor{Command: CommandOrShell{Command: []string{"total", "failure"}}}, m.Monitor{AlertAfter: 1, Command: []string{"total", "failure"}},
expected{isSuccess: false, hasNotice: true, lastOutput: ""}, expected{isSuccess: false, hasNotice: true, lastOutput: ""},
"Test failed command", "Test failed command",
}, },
{ {
Monitor{Command: CommandOrShell{ShellCommand: "false"}}, m.Monitor{AlertAfter: 1, ShellCommand: "false"},
expected{isSuccess: false, hasNotice: true, lastOutput: ""}, expected{isSuccess: false, hasNotice: true, lastOutput: ""},
"Test failed command shell", "Test failed command shell",
}, },
} }
for _, c := range cases { for _, c := range cases {
log.Printf("Testing case %s", c.name) c := c
t.Run(c.name, func(t *testing.T) {
t.Parallel()
isSuccess, notice := c.monitor.Check() isSuccess, notice := c.monitor.Check()
if isSuccess != c.expect.isSuccess { if isSuccess != c.expect.isSuccess {
t.Errorf("Check(%v) (success), expected=%t actual=%t", c.name, c.expect.isSuccess, isSuccess) t.Errorf("Check(%v) (success), expected=%t actual=%t", c.name, c.expect.isSuccess, isSuccess)
log.Printf("Case failed: %s", c.name)
} }
hasNotice := (notice != nil) hasNotice := (notice != nil)
if hasNotice != c.expect.hasNotice { if hasNotice != c.expect.hasNotice {
t.Errorf("Check(%v) (notice), expected=%t actual=%t", c.name, c.expect.hasNotice, hasNotice) t.Errorf("Check(%v) (notice), expected=%t actual=%t", c.name, c.expect.hasNotice, hasNotice)
log.Printf("Case failed: %s", c.name)
} }
lastOutput := c.monitor.lastOutput lastOutput := c.monitor.LastOutput()
if lastOutput != c.expect.lastOutput { if lastOutput != c.expect.lastOutput {
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput) t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
log.Printf("Case failed: %s", c.name)
} }
log.Println("-----") })
} }
} }
+3
View File
@@ -0,0 +1,3 @@
{
"$schema": "https://docs.renovatebot.com/renovate-schema.json"
}
+52
View File
@@ -0,0 +1,52 @@
check_interval = "5s"
monitor "Fake Website" {
command = ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
alert_down = ["log_down", "mailgun_down", "sms_down"]
alert_up = ["log_up", "email_up"]
check_interval = "10s" # Must be at minimum the global `check_interval`
alert_after = 3
alert_every = -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
}
monitor "Real Website" {
command = ["curl", "-s", "-o", "/dev/null", "https://google.com"]
alert_down = ["log_down", "mailgun_down", "sms_down"]
alert_up = ["log_up", "email_up"]
check_interval = "5s"
alert_after = 3
alert_every = -1
}
alert "log_down" {
command = ["echo", "Minitor failure for {{.MonitorName}}"]
}
alert "log_up" {
command = ["echo", "Minitor recovery for {{.MonitorName}}"]
}
alert "email_up" {
command = ["sendmail", "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
}
alert "mailgun_down" {
shell_command = <<-EOF
curl -s -X POST \
-F subject="Alert! {{.MonitorName}} failed" \
-F from="Minitor <minitor@minitor.mon>" \
-F to=me@minitor.mon \
-F text="Our monitor failed" \
https://api.mailgun.net/v3/minitor.mon/messages \
-u "api:$${MAILGUN_API_KEY}"
EOF
}
alert "sms_down" {
shell_command = <<-EOF
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed" \
-F "From=$${AVAILABLE_NUMBER}" -F "To=$${MY_PHONE}" \
"https://api.twilio.com/2010-04-01/Accounts/$${ACCOUNT_SID}/Messages" \
-u "$${ACCOUNT_SID}:$${AUTH_TOKEN}"
EOF
}
-41
View File
@@ -1,41 +0,0 @@
---
check_interval: 5
monitors:
- name: Fake Website
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 10 # Must be at minimum the global `check_interval`
alert_after: 3
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
- name: Real Website
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 5
alert_after: 3
alert_every: -1
alerts:
log_down:
command: ["echo", "Minitor failure for {{.MonitorName}}"]
log_up:
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
email_up:
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
mailgun_down:
command: >
curl -s -X POST
-F subject="Alert! {{.MonitorName}} failed"
-F from="Minitor <minitor@minitor.mon>"
-F to=me@minitor.mon
-F text="Our monitor failed"
https://api.mailgun.net/v3/minitor.mon/messages
-u "api:${MAILGUN_API_KEY}"
sms_down:
command: >
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed"
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
+6
View File
@@ -0,0 +1,6 @@
#! /bin/sh
# Used for a basic HTTP health check
# Avoids output from non-errors and will fail if the HTTP response is unsuccessful
curl --silent --show-error --fail -o /dev/null "$@"
+15 -3
View File
@@ -11,6 +11,7 @@ set -e
# To override, export DOCKER_HOST to a new hostname # To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}" DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1" container_name="$1"
num_log_lines="$2"
# Curls Docker either using a socket or URL # Curls Docker either using a socket or URL
function curl_docker { function curl_docker {
@@ -31,21 +32,32 @@ function get_container_id {
# Returns container JSON # Returns container JSON
function inspect_container { function inspect_container {
local container_id=$1 local container_id="$1"
curl_docker "containers/$container_id/json" curl_docker "containers/$container_id/json"
} }
# Gets some lines from docker log
function get_logs {
container_id="$1"
num_lines="$2"
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
}
if [ -z "$container_name" ]; then if [ -z "$container_name" ]; then
echo "Usage: $0 container_name" echo "Usage: $0 container_name [num_log_lines]"
echo "Will exit with the last status code of continer with provided name" echo "Will exit with the last status code of continer with provided name"
exit 1 exit 1
fi fi
container_id=$(get_container_id $container_name) container_id=$(get_container_id "$container_name")
if [ -z "$container_id" ]; then if [ -z "$container_id" ]; then
echo "ERROR: Could not find container with name: $container_name" echo "ERROR: Could not find container with name: $container_name"
exit 1 exit 1
fi fi
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode) exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
if [ -n "$num_log_lines" ]; then
get_logs "$container_id" "$num_log_lines"
fi
exit "$exit_code" exit "$exit_code"
+13 -1
View File
@@ -11,6 +11,7 @@ set -e
# To override, export DOCKER_HOST to a new hostname # To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}" DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1" container_name="$1"
num_log_lines="$2"
# Curls Docker either using a socket or URL # Curls Docker either using a socket or URL
function curl_docker { function curl_docker {
@@ -35,8 +36,15 @@ function inspect_container {
curl_docker "containers/$container_id/json" curl_docker "containers/$container_id/json"
} }
# Gets some lines from docker log
function get_logs {
container_id="$1"
num_lines="$2"
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
}
if [ -z "$container_name" ]; then if [ -z "$container_name" ]; then
echo "Usage: $0 container_name" echo "Usage: $0 container_name [num_log_lines]"
echo "Will return results of healthcheck for continer with provided name" echo "Will return results of healthcheck for continer with provided name"
exit 1 exit 1
fi fi
@@ -48,6 +56,10 @@ if [ -z "$container_id" ]; then
fi fi
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status') health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
if [ -n "$num_log_lines" ]; then
get_logs "$container_id" "$num_log_lines"
fi
case "$health" in case "$health" in
null) null)
echo "No healthcheck results" echo "No healthcheck results"
+1
View File
@@ -0,0 +1 @@
check_interval = "woops, I'm not an int!"
+7
View File
@@ -0,0 +1,7 @@
check_interval = "1s"
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = [ "alert_down", "log_shell", "log_command" ]
alert_every = 0
}
-8
View File
@@ -1,8 +0,0 @@
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: [ 'alert_down', 'log_shell', 'log_command' ]
# alert_every: -1
alert_every: 0
-1
View File
@@ -1 +0,0 @@
check_interval: woops, I'm not an int!
+12
View File
@@ -0,0 +1,12 @@
check_interval = "1s"
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = ["not_log"]
alert_every = 0
}
alert "log" {
command = ["true"]
}
-13
View File
@@ -1,13 +0,0 @@
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: [ 'not_log']
# alert_every: -1
alert_every: 0
alerts:
log:
command: ['true']
+12
View File
@@ -0,0 +1,12 @@
check_interval = "1s"
alert "log_command" {
command = "should be a list"
}
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = ["log_command"]
alert_every = 2
check_interval = "10s"
}
+16
View File
@@ -0,0 +1,16 @@
check_interval = "1s"
default_alert_down = ["log_command"]
default_alert_every = 0
default_alert_after = 2
monitor "Default" {
command = ["echo"]
}
monitor "Command" {
command = ["echo", "$PATH"]
}
alert "log_command" {
command = ["echo", "default", "'command!!!'", "{{.MonitorName}}"]
}
+34
View File
@@ -0,0 +1,34 @@
check_interval = "1s"
alert "log_command" {
command = ["echo", "regular", "'command!!!'", "{{.MonitorName}}"]
}
alert "log_shell" {
shell_command = "echo \"Failure on {{.MonitorName}} User is $USER\""
}
monitor "Default" {
command = ["echo"]
alert_down = ["log_command"]
}
monitor "Command" {
command = ["echo", "$PATH"]
alert_down = ["log_command", "log_shell"]
alert_every = 2
check_interval = "10s"
}
monitor "Shell" {
shell_command = <<-EOF
echo 'Some string with stuff'
echo 'another line'
echo $PATH
exit 1
EOF
alert_down = ["log_command", "log_shell"]
alert_after = 5
alert_every = 0
check_interval = "1m"
}
-23
View File
@@ -1,23 +0,0 @@
---
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: ['log_command', 'log_shell']
alert_every: 0
- name: Shell
command: >
echo 'Some string with stuff';
echo 'another line';
echo $PATH;
exit 1
alert_down: ['log_command', 'log_shell']
alert_after: 5
alert_every: 0
alerts:
log_command:
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
log_shell:
command: echo "Failure on {{.MonitorName}} User is $USER"
-8
View File
@@ -1,8 +0,0 @@
---
check_interval: 1
monitors:
- name: Command
command: ['echo', '$PATH']
alert_down: ['log']
alert_every: 0
+19
View File
@@ -0,0 +1,19 @@
check_interval = "1s"
monitor "Shell" {
shell_command = <<-EOF
echo 'Some string with stuff'
echo "<angle brackets>"
exit 1
EOF
alert_down = ["log_shell"]
alert_after = 1
alert_every = 0
}
alert "log_shell" {
shell_command = <<EOF
echo 'Some string with stuff'
echo '<angle brackets>'
EOF
}
-18
View File
@@ -1,18 +0,0 @@
---
check_interval: 1
monitors:
- name: Shell
command: >
echo 'Some string with stuff';
echo "<angle brackets>";
exit 1
alert_down: ['log_shell']
alert_after: 1
alert_every: 0
alerts:
log_shell:
command: |
echo 'Some string with stuff'
echo '<angle brackets>'
+3 -1
View File
@@ -8,7 +8,7 @@ import (
// ShellCommand takes a string and executes it as a command using `sh` // ShellCommand takes a string and executes it as a command using `sh`
func ShellCommand(command string) *exec.Cmd { func ShellCommand(command string) *exec.Cmd {
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)} shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
//log.Printf("Shell command: %v", shellCommand)
return exec.Command(shellCommand[0], shellCommand[1:]...) return exec.Command(shellCommand[0], shellCommand[1:]...)
} }
@@ -17,10 +17,12 @@ func EqualSliceString(a, b []string) bool {
if len(a) != len(b) { if len(a) != len(b) {
return false return false
} }
for i, val := range a { for i, val := range a {
if val != b[i] { if val != b[i] {
return false return false
} }
} }
return true return true
} }
+10 -1
View File
@@ -1,6 +1,9 @@
package main package main
import "testing" import (
"fmt"
"testing"
)
func TestUtilEqualSliceString(t *testing.T) { func TestUtilEqualSliceString(t *testing.T) {
cases := []struct { cases := []struct {
@@ -21,6 +24,11 @@ func TestUtilEqualSliceString(t *testing.T) {
} }
for _, c := range cases { for _, c := range cases {
c := c
t.Run(fmt.Sprintf("%v %v", c.a, c.b), func(t *testing.T) {
t.Parallel()
actual := EqualSliceString(c.a, c.b) actual := EqualSliceString(c.a, c.b)
if actual != c.expected { if actual != c.expected {
t.Errorf( t.Errorf(
@@ -28,5 +36,6 @@ func TestUtilEqualSliceString(t *testing.T) {
c.a, c.b, c.expected, actual, c.a, c.b, c.expected, actual,
) )
} }
})
} }
} }