Compare commits
34 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
60cfac948b | ||
|
|
958446050f | ||
|
|
88e94642d9 | ||
|
|
bc83a51907 | ||
|
|
08b8932331 | ||
|
|
9072d97bb8 | ||
|
|
cdd8a69669 | ||
|
|
3c14a02770 | ||
|
|
328ea83c25 | ||
|
|
ce986e8d1d | ||
|
|
31a4b484bf | ||
|
|
49e3635819 | ||
|
|
444d060736 | ||
|
|
860c2cdf43 | ||
|
|
befea7375f | ||
|
|
04395fa693 | ||
|
|
bdf7355fa7 | ||
|
|
30c2c7d6b2 | ||
|
|
5f250f17a8 | ||
|
|
fda9e1bfc3 | ||
|
|
f0e179851f | ||
|
|
9e124803da | ||
|
|
2c4543a7bc | ||
|
|
a1b906b94a | ||
|
|
0a5be250b5 | ||
|
|
88f77aa27c | ||
|
|
67c2375bba | ||
|
|
aad9eaa32f | ||
|
|
5dc5ba5257 | ||
|
|
4aff294739 | ||
|
|
0684b15a44 | ||
|
|
d3826dacde | ||
|
|
f8e40c643c | ||
|
|
cffbbd734a |
+48
-26
@@ -3,31 +3,15 @@ kind: pipeline
|
||||
name: test
|
||||
|
||||
steps:
|
||||
|
||||
- name: test
|
||||
image: golang:1.12
|
||||
image: golang:1.17
|
||||
environment:
|
||||
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||
commands:
|
||||
- make build
|
||||
- make test
|
||||
|
||||
- name: check
|
||||
image: python:3
|
||||
commands:
|
||||
- pip install pre-commit==1.20.0
|
||||
- make check
|
||||
|
||||
- name: notify
|
||||
image: drillster/drone-email
|
||||
settings:
|
||||
host:
|
||||
from_secret: SMTP_HOST
|
||||
username:
|
||||
from_secret: SMTP_USER
|
||||
password:
|
||||
from_secret: SMTP_PASS
|
||||
from: drone@iamthefij.com
|
||||
when:
|
||||
status: [changed, failure]
|
||||
image: iamthefij/drone-pre-commit:personal
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
@@ -46,10 +30,36 @@ trigger:
|
||||
|
||||
steps:
|
||||
- name: build all binaries
|
||||
image: golang:1.12
|
||||
image: golang:1.17
|
||||
environment:
|
||||
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
|
||||
commands:
|
||||
- make all
|
||||
|
||||
- name: compress binaries for release
|
||||
image: ubuntu
|
||||
commands:
|
||||
- find ./dist -type f -executable -execdir tar -czvf {}.tar.gz {} \;
|
||||
when:
|
||||
event: tag
|
||||
|
||||
- name: upload gitea release
|
||||
image: plugins/gitea-release
|
||||
settings:
|
||||
title: ${DRONE_TAG}
|
||||
files: dist/*.tar.gz
|
||||
checksum:
|
||||
- md5
|
||||
- sha1
|
||||
- sha256
|
||||
- sha512
|
||||
base_url:
|
||||
from_secret: gitea_base_url
|
||||
api_key:
|
||||
from_secret: gitea_token
|
||||
when:
|
||||
event: tag
|
||||
|
||||
- name: push image - arm
|
||||
image: plugins/docker
|
||||
settings:
|
||||
@@ -100,15 +110,27 @@ steps:
|
||||
password:
|
||||
from_secret: docker_password
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: notify
|
||||
|
||||
depends_on:
|
||||
- test
|
||||
- publish
|
||||
|
||||
trigger:
|
||||
status:
|
||||
- failure
|
||||
|
||||
steps:
|
||||
|
||||
- name: notify
|
||||
image: drillster/drone-email
|
||||
settings:
|
||||
host:
|
||||
from_secret: SMTP_HOST
|
||||
from_secret: SMTP_HOST # pragma: whitelist secret
|
||||
username:
|
||||
from_secret: SMTP_USER
|
||||
from_secret: SMTP_USER # pragma: whitelist secret
|
||||
password:
|
||||
from_secret: SMTP_PASS
|
||||
from_secret: SMTP_PASS # pragma: whitelist secret
|
||||
from: drone@iamthefij.com
|
||||
when:
|
||||
status: [changed, failure]
|
||||
|
||||
Vendored
+2
-2
@@ -17,5 +17,5 @@ config.yml
|
||||
|
||||
# Output binary
|
||||
minitor
|
||||
minitor-linux-*
|
||||
minitor-darwin-amd64
|
||||
minitor-go
|
||||
dist/
|
||||
|
||||
@@ -0,0 +1,37 @@
|
||||
---
|
||||
linters:
|
||||
enable:
|
||||
- errname
|
||||
- errorlint
|
||||
- exhaustive
|
||||
- gofumpt
|
||||
- goimports
|
||||
- gomnd
|
||||
- goprintffuncname
|
||||
- misspell
|
||||
- promlinter
|
||||
- tagliatelle
|
||||
- tenv
|
||||
- testpackage
|
||||
- thelper
|
||||
- tparallel
|
||||
- unconvert
|
||||
- wrapcheck
|
||||
- wsl
|
||||
disable:
|
||||
- gochecknoglobals
|
||||
|
||||
linters-settings:
|
||||
gosec:
|
||||
excludes:
|
||||
- G204
|
||||
tagliatelle:
|
||||
case:
|
||||
rules:
|
||||
yaml: snake
|
||||
|
||||
issues:
|
||||
exclude-rules:
|
||||
- path: _test\.go
|
||||
linters:
|
||||
- gosec
|
||||
@@ -1,7 +1,7 @@
|
||||
---
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v2.4.0
|
||||
rev: v4.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-yaml
|
||||
@@ -10,10 +10,11 @@ repos:
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-merge-conflict
|
||||
- repo: git://github.com/dnephin/pre-commit-golang
|
||||
rev: v0.3.5
|
||||
- repo: https://github.com/golangci/golangci-lint
|
||||
rev: v1.50.1
|
||||
hooks:
|
||||
- id: go-fmt
|
||||
- id: go-imports
|
||||
# - id: gometalinter
|
||||
# - id: golangci-lint
|
||||
- id: golangci-lint
|
||||
- repo: https://github.com/hadolint/hadolint
|
||||
rev: v2.12.1-beta
|
||||
hooks:
|
||||
- id: hadolint
|
||||
|
||||
+3
-7
@@ -1,15 +1,11 @@
|
||||
ARG REPO=library
|
||||
FROM multiarch/qemu-user-static:4.2.0-2 as qemu-user-static
|
||||
FROM ${REPO}/alpine:3.10
|
||||
|
||||
# Copying all qemu files because amd64 doesn't exist and cannot condional copy
|
||||
COPY --from=qemu-user-static /usr/bin/qemu-* /usr/bin/
|
||||
FROM ${REPO}/alpine:3.12
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
@@ -20,7 +16,7 @@ RUN chmod -R 755 /app/scripts
|
||||
|
||||
# Copy minitor in
|
||||
ARG ARCH=amd64
|
||||
COPY ./minitor-linux-${ARCH} ./minitor
|
||||
COPY ./dist/minitor-linux-${ARCH} ./minitor
|
||||
|
||||
# Drop to non-root user
|
||||
USER minitor
|
||||
|
||||
@@ -1,7 +1,5 @@
|
||||
ARG REPO=library
|
||||
FROM golang:1.12-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache git=~2
|
||||
FROM golang:1.17 AS builder
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
@@ -16,7 +14,7 @@ ARG VERSION=dev
|
||||
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
|
||||
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
|
||||
|
||||
FROM ${REPO}/alpine:3.10
|
||||
FROM ${REPO}/alpine:3.12
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
|
||||
@@ -24,7 +22,7 @@ WORKDIR /app/
|
||||
COPY --from=builder /app/minitor .
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.79 jq=~1.6
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
|
||||
@@ -1,36 +1,43 @@
|
||||
DOCKER_TAG ?= minitor-go-${USER}
|
||||
GIT_TAG_NAME := $(shell git tag -l --contains HEAD)
|
||||
GIT_SHA := $(shell git rev-parse HEAD)
|
||||
VERSION := $(if $(GIT_TAG_NAME),$(GIT_TAG_NAME),$(GIT_SHA))
|
||||
VERSION ?= $(shell git describe --tags --dirty)
|
||||
GOFILES = *.go
|
||||
# Multi-arch targets are generated from this
|
||||
TARGET_ALIAS = minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64 minitor-darwin-amd64
|
||||
TARGETS = $(addprefix dist/,$(TARGET_ALIAS))
|
||||
#
|
||||
# Default make target will run tests
|
||||
.DEFAULT_GOAL = test
|
||||
|
||||
# Build all static Minitor binaries
|
||||
.PHONY: all
|
||||
all: minitor-linux-amd64 minitor-linux-arm minitor-linux-arm64
|
||||
all: $(TARGETS)
|
||||
|
||||
.PHONY: default
|
||||
default: test
|
||||
# Build all static Linux Minitor binaries. Used in Docker images
|
||||
.PHONY: all-linux
|
||||
all-linux: $(filter dist/minitor-linux-%,$(TARGETS))
|
||||
|
||||
# Build minitor for the current machine
|
||||
minitor: $(GOFILES)
|
||||
@echo Version: $(VERSION)
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
||||
|
||||
.PHONY: build
|
||||
build: minitor
|
||||
|
||||
minitor:
|
||||
@echo Version: $(VERSION)
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -o minitor
|
||||
|
||||
# Run minitor for the current machine
|
||||
.PHONY: run
|
||||
run: minitor build
|
||||
run: minitor
|
||||
./minitor -debug
|
||||
|
||||
.PHONY: run-metrics
|
||||
run-metrics: minitor build
|
||||
run-metrics: minitor
|
||||
./minitor -debug -metrics
|
||||
|
||||
# Run all tests
|
||||
.PHONY: test
|
||||
test:
|
||||
go test -coverprofile=coverage.out
|
||||
@echo
|
||||
go tool cover -func=coverage.out
|
||||
@echo
|
||||
@# Check min coverage percentage
|
||||
@go tool cover -func=coverage.out | awk -v target=80.0% \
|
||||
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
|
||||
|
||||
@@ -39,7 +46,7 @@ test:
|
||||
install-hooks:
|
||||
pre-commit install --install-hooks
|
||||
|
||||
# Checks files for encryption
|
||||
# Runs pre-commit checks on files
|
||||
.PHONY: check
|
||||
check:
|
||||
pre-commit run --all-files
|
||||
@@ -47,9 +54,8 @@ check:
|
||||
.PHONY: clean
|
||||
clean:
|
||||
rm -f ./minitor
|
||||
rm -f ./minitor-linux-*
|
||||
rm -f ./minitor-darwin-amd64
|
||||
rm -f ./coverage.out
|
||||
rm -fr ./dist
|
||||
|
||||
.PHONY: docker-build
|
||||
docker-build:
|
||||
@@ -60,35 +66,23 @@ docker-run: docker-build
|
||||
docker run --rm -v $(shell pwd)/config.yml:/root/config.yml $(DOCKER_TAG)
|
||||
|
||||
## Multi-arch targets
|
||||
|
||||
# Arch specific go build targets
|
||||
minitor-darwin-amd64:
|
||||
GOOS=darwin GOARCH=amd64 CGO_ENABLED=0 \
|
||||
$(TARGETS): $(GOFILES)
|
||||
mkdir -p ./dist
|
||||
GOOS=$(word 2, $(subst -, ,$(@))) GOARCH=$(word 3, $(subst -, ,$(@))) CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-darwin-amd64
|
||||
-o $@
|
||||
|
||||
minitor-linux-amd64:
|
||||
GOOS=linux GOARCH=amd64 CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-amd64
|
||||
|
||||
minitor-linux-arm:
|
||||
GOOS=linux GOARCH=arm CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-arm
|
||||
|
||||
minitor-linux-arm64:
|
||||
GOOS=linux GOARCH=arm64 CGO_ENABLED=0 \
|
||||
go build -ldflags '-X "main.version=${VERSION}"' -a -installsuffix nocgo \
|
||||
-o minitor-linux-arm64
|
||||
.PHONY: $(TARGET_ALIAS)
|
||||
$(TARGET_ALIAS):
|
||||
$(MAKE) $(addprefix dist/,$@)
|
||||
|
||||
# Arch specific docker build targets
|
||||
.PHONY: docker-build-arm
|
||||
docker-build-arm: minitor-linux-arm
|
||||
docker-build-arm: dist/minitor-linux-arm
|
||||
docker build --build-arg REPO=arm32v7 --build-arg ARCH=arm . -t ${DOCKER_TAG}-linux-arm
|
||||
|
||||
.PHONY: docker-build-arm
|
||||
docker-build-arm64: minitor-linux-arm64
|
||||
.PHONY: docker-build-arm64
|
||||
docker-build-arm64: dist/minitor-linux-arm64
|
||||
docker build --build-arg REPO=arm64v8 --build-arg ARCH=arm64 . -t ${DOCKER_TAG}-linux-arm64
|
||||
|
||||
# Cross run on host architechture
|
||||
|
||||
@@ -54,7 +54,7 @@ The global configurations are:
|
||||
|
||||
|key|value|
|
||||
|---|---|
|
||||
|`check_interval`|Maximum frequency to run checks for each monitor|
|
||||
|`check_interval`|Maximum frequency to run checks for each monitor as duration, eg. 1m2s.|
|
||||
|`monitors`|List of all monitors. Detailed description below|
|
||||
|`alerts`|List of all alerts. Detailed description below|
|
||||
|
||||
@@ -93,6 +93,7 @@ Also, when alerts are executed, they will be passed through Go's format function
|
||||
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|
||||
|`{{.LastSuccess}}`|The ISO datetime of the last successful check|
|
||||
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|
||||
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
|
||||
|
||||
### Metrics
|
||||
|
||||
@@ -110,7 +111,7 @@ minitor -metrics -metrics-port 3000
|
||||
|
||||
## Contributing
|
||||
|
||||
Whether you're looking to submit a patch or just tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
||||
Whether you're looking to submit a patch or tell me I broke something, you can contribute through the Github mirror and I can merge PRs back to the source repository.
|
||||
|
||||
Primary Repo: https://git.iamthefij.com/iamthefij/minitor.git
|
||||
|
||||
@@ -142,15 +143,25 @@ alerts:
|
||||
command: 'echo {{.MonitorName}}'
|
||||
```
|
||||
|
||||
Interval durations have changed from being an integer number of seconds to a duration string supported by Go, for example:
|
||||
|
||||
minitor-py:
|
||||
```yaml
|
||||
check_interval: 90
|
||||
```
|
||||
|
||||
minitor-go:
|
||||
```yaml
|
||||
check_interval: 1m30s
|
||||
```
|
||||
|
||||
For the time being, legacy configs for the Python version of Minitor should be compatible if you apply the `-py-compat` flag when running Minitor. Eventually, this flag will go away when later breaking changes are introduced.
|
||||
|
||||
## Future
|
||||
|
||||
Future, potentially breaking changes
|
||||
|
||||
- [ ] Implement leveled logging (maybe glog or logrus)
|
||||
- [ ] Consider value of templating vs injecting values into Env variables
|
||||
- [ ] Async checking
|
||||
- [ ] Revisit metrics and see if they all make sense
|
||||
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging (Breaking)
|
||||
- [ ] Use durations rather than seconds checked in event loop (Potentially breaking)
|
||||
|
||||
@@ -2,12 +2,21 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"errors"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"strings"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
var (
|
||||
errNoTemplate = errors.New("no template")
|
||||
|
||||
// ErrAlertFailed indicates that an alert failed to send
|
||||
ErrAlertFailed = errors.New("alert failed")
|
||||
)
|
||||
|
||||
// Alert is a config driven mechanism for sending a notice
|
||||
@@ -20,12 +29,12 @@ type Alert struct {
|
||||
|
||||
// AlertNotice captures the context for an alert to be sent
|
||||
type AlertNotice struct {
|
||||
MonitorName string
|
||||
AlertCount int16
|
||||
FailureCount int16
|
||||
LastCheckOutput string
|
||||
LastSuccess time.Time
|
||||
IsUp bool
|
||||
LastSuccess time.Time
|
||||
MonitorName string
|
||||
LastCheckOutput string
|
||||
}
|
||||
|
||||
// IsValid returns a boolean indicating if the Alert has been correctly
|
||||
@@ -45,29 +54,33 @@ func (alert *Alert) BuildTemplates() error {
|
||||
"{last_success}", "{{.LastSuccess}}",
|
||||
"{monitor_name}", "{{.MonitorName}}",
|
||||
)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Building template for alert %s", alert.Name)
|
||||
}
|
||||
if alert.commandTemplate == nil && alert.Command.Command != nil {
|
||||
|
||||
slog.Debugf("Building template for alert %s", alert.Name)
|
||||
|
||||
switch {
|
||||
case alert.commandTemplate == nil && alert.Command.Command != nil:
|
||||
alert.commandTemplate = []*template.Template{}
|
||||
for i, cmdPart := range alert.Command.Command {
|
||||
if PyCompat {
|
||||
cmdPart = legacy.Replace(cmdPart)
|
||||
}
|
||||
|
||||
alert.commandTemplate = append(alert.commandTemplate, template.Must(
|
||||
template.New(alert.Name+string(i)).Parse(cmdPart),
|
||||
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
|
||||
))
|
||||
}
|
||||
} else if alert.commandShellTemplate == nil && alert.Command.ShellCommand != "" {
|
||||
case alert.commandShellTemplate == nil && alert.Command.ShellCommand != "":
|
||||
shellCmd := alert.Command.ShellCommand
|
||||
|
||||
if PyCompat {
|
||||
shellCmd = legacy.Replace(shellCmd)
|
||||
}
|
||||
|
||||
alert.commandShellTemplate = template.Must(
|
||||
template.New(alert.Name).Parse(shellCmd),
|
||||
)
|
||||
} else {
|
||||
return fmt.Errorf("No template provided for alert %s", alert.Name)
|
||||
default:
|
||||
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -75,30 +88,40 @@ func (alert *Alert) BuildTemplates() error {
|
||||
|
||||
// Send will send an alert notice by executing the command template
|
||||
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
slog.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
|
||||
var cmd *exec.Cmd
|
||||
if alert.commandTemplate != nil {
|
||||
|
||||
switch {
|
||||
case alert.commandTemplate != nil:
|
||||
command := []string{}
|
||||
|
||||
for _, cmdTmp := range alert.commandTemplate {
|
||||
var commandBuffer bytes.Buffer
|
||||
|
||||
err = cmdTmp.Execute(&commandBuffer, notice)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
command = append(command, commandBuffer.String())
|
||||
}
|
||||
|
||||
cmd = exec.Command(command[0], command[1:]...)
|
||||
} else if alert.commandShellTemplate != nil {
|
||||
case alert.commandShellTemplate != nil:
|
||||
var commandBuffer bytes.Buffer
|
||||
|
||||
err = alert.commandShellTemplate.Execute(&commandBuffer, notice)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
shellCommand := commandBuffer.String()
|
||||
|
||||
cmd = ShellCommand(shellCommand)
|
||||
} else {
|
||||
err = fmt.Errorf("No templates compiled for alert %v", alert.Name)
|
||||
default:
|
||||
err = fmt.Errorf("No templates compiled for alert %s: %w", alert.Name, errNoTemplate)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -110,8 +133,15 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||
var output []byte
|
||||
output, err = cmd.CombinedOutput()
|
||||
outputStr = string(output)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||
slog.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||
|
||||
if err != nil {
|
||||
err = fmt.Errorf(
|
||||
"Alert '%s' failed to send. Returned %v: %w",
|
||||
alert.Name,
|
||||
err,
|
||||
ErrAlertFailed,
|
||||
)
|
||||
}
|
||||
|
||||
return outputStr, err
|
||||
@@ -124,7 +154,7 @@ func NewLogAlert() *Alert {
|
||||
Command: CommandOrShell{
|
||||
Command: []string{
|
||||
"echo",
|
||||
"{{.MonitorName}} check has failed {{.FailureCount}} times",
|
||||
"{{.MonitorName}} {{if .IsUp}}has recovered{{else}}check has failed {{.FailureCount}} times{{end}}",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
+34
-1
@@ -18,11 +18,13 @@ func TestAlertIsValid(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
actual := c.alert.IsValid()
|
||||
if actual != c.expected {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -76,25 +78,52 @@ func TestAlertSend(t *testing.T) {
|
||||
"Command shell with legacy template",
|
||||
true,
|
||||
},
|
||||
// Test default log alert down
|
||||
{
|
||||
*NewLogAlert(),
|
||||
AlertNotice{MonitorName: "Test", FailureCount: 1, IsUp: false},
|
||||
"Test check has failed 1 times\n",
|
||||
false,
|
||||
"Default log alert down",
|
||||
false,
|
||||
},
|
||||
// Test default log alert up
|
||||
{
|
||||
*NewLogAlert(),
|
||||
AlertNotice{MonitorName: "Test", IsUp: true},
|
||||
"Test has recovered\n",
|
||||
false,
|
||||
"Default log alert up",
|
||||
false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
// Set PyCompat to value of compat flag
|
||||
PyCompat = c.pyCompat
|
||||
c.alert.BuildTemplates()
|
||||
|
||||
err := c.alert.BuildTemplates()
|
||||
if err != nil {
|
||||
t.Errorf("Send(%v output), error building templates: %v", c.name, err)
|
||||
}
|
||||
|
||||
output, err := c.alert.Send(c.notice)
|
||||
hasErr := (err != nil)
|
||||
|
||||
if output != c.expectedOutput {
|
||||
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
// Set PyCompat back to default value
|
||||
PyCompat = false
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -102,10 +131,12 @@ func TestAlertSend(t *testing.T) {
|
||||
func TestAlertSendNoTemplates(t *testing.T) {
|
||||
alert := Alert{}
|
||||
notice := AlertNotice{}
|
||||
|
||||
output, err := alert.Send(notice)
|
||||
if err == nil {
|
||||
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
|
||||
@@ -124,10 +155,12 @@ func TestAlertBuildTemplate(t *testing.T) {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
err := c.alert.BuildTemplates()
|
||||
hasErr := (err != nil)
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,14 +3,17 @@ package main
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
"gopkg.in/yaml.v2"
|
||||
)
|
||||
|
||||
var errInvalidConfig = errors.New("Invalid configuration")
|
||||
|
||||
// Config type is contains all provided user configuration
|
||||
type Config struct {
|
||||
CheckInterval int64 `yaml:"check_interval"`
|
||||
CheckInterval SecondsOrDuration `yaml:"check_interval"`
|
||||
Monitors []*Monitor
|
||||
Alerts map[string]*Alert
|
||||
}
|
||||
@@ -35,17 +38,48 @@ func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) erro
|
||||
// Error indicates this is shell command
|
||||
if err != nil {
|
||||
var shellCmd string
|
||||
|
||||
err := unmarshal(&shellCmd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
cos.ShellCommand = shellCmd
|
||||
} else {
|
||||
cos.Command = cmd
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SecondsOrDuration wraps a duration value for parsing a duration or seconds from YAML
|
||||
// NOTE: This should be removed in favor of only parsing durations once compatibility is broken
|
||||
type SecondsOrDuration struct {
|
||||
value time.Duration
|
||||
}
|
||||
|
||||
// Value returns a duration value
|
||||
func (sod SecondsOrDuration) Value() time.Duration {
|
||||
return sod.value
|
||||
}
|
||||
|
||||
// UnmarshalYAML allows unmarshalling a duration value or seconds if an int was provided
|
||||
func (sod *SecondsOrDuration) UnmarshalYAML(unmarshal func(interface{}) error) error {
|
||||
var seconds int64
|
||||
err := unmarshal(&seconds)
|
||||
|
||||
if err == nil {
|
||||
sod.value = time.Second * time.Duration(seconds)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Error indicates that we don't have an int
|
||||
err = unmarshal(&sod.value)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// IsValid checks config validity and returns true if valid
|
||||
func (config Config) IsValid() (isValid bool) {
|
||||
isValid = true
|
||||
@@ -53,41 +87,50 @@ func (config Config) IsValid() (isValid bool) {
|
||||
// Validate alerts
|
||||
if config.Alerts == nil || len(config.Alerts) == 0 {
|
||||
// This should never happen because there is a default alert named 'log' for now
|
||||
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
|
||||
slog.Errorf("Invalid alert configuration: Must provide at least one alert")
|
||||
|
||||
isValid = false
|
||||
}
|
||||
|
||||
for _, alert := range config.Alerts {
|
||||
if !alert.IsValid() {
|
||||
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
|
||||
slog.Errorf("Invalid alert configuration: %+v", alert.Name)
|
||||
|
||||
isValid = false
|
||||
} else {
|
||||
slog.Debugf("Loaded alert %s", alert.Name)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate monitors
|
||||
if config.Monitors == nil || len(config.Monitors) == 0 {
|
||||
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
|
||||
slog.Errorf("Invalid monitor configuration: Must provide at least one monitor")
|
||||
|
||||
isValid = false
|
||||
}
|
||||
|
||||
for _, monitor := range config.Monitors {
|
||||
if !monitor.IsValid() {
|
||||
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
|
||||
slog.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
||||
|
||||
isValid = false
|
||||
}
|
||||
// Check that all Monitor alerts actually exist
|
||||
for _, isUp := range []bool{true, false} {
|
||||
for _, alertName := range monitor.GetAlertNames(isUp) {
|
||||
if _, ok := config.Alerts[alertName]; !ok {
|
||||
log.Printf(
|
||||
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
|
||||
slog.Errorf(
|
||||
"Invalid monitor configuration: %s. Unknown alert %s",
|
||||
monitor.Name, alertName,
|
||||
)
|
||||
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
return isValid
|
||||
}
|
||||
|
||||
// Init performs extra initialization on top of loading the config from file
|
||||
@@ -114,28 +157,30 @@ func LoadConfig(filePath string) (config Config, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Config values:\n%v\n", config)
|
||||
}
|
||||
slog.Debugf("Config values:\n%v\n", config)
|
||||
|
||||
// Add log alert if not present
|
||||
if PyCompat {
|
||||
// Intialize alerts list if not present
|
||||
// Initialize alerts list if not present
|
||||
if config.Alerts == nil {
|
||||
config.Alerts = map[string]*Alert{}
|
||||
}
|
||||
|
||||
if _, ok := config.Alerts["log"]; !ok {
|
||||
config.Alerts["log"] = NewLogAlert()
|
||||
}
|
||||
}
|
||||
|
||||
if !config.IsValid() {
|
||||
err = errors.New("Invalid configuration")
|
||||
return
|
||||
}
|
||||
|
||||
// Finish initializing configuration
|
||||
err = config.Init()
|
||||
if err = config.Init(); err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if !config.IsValid() {
|
||||
err = errInvalidConfig
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
return config, err
|
||||
}
|
||||
|
||||
+41
-1
@@ -3,6 +3,7 @@ package main
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestLoadConfig(t *testing.T) {
|
||||
@@ -27,20 +28,50 @@ func TestLoadConfig(t *testing.T) {
|
||||
PyCompat = c.pyCompat
|
||||
_, err := LoadConfig(c.configPath)
|
||||
hasErr := (err != nil)
|
||||
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
// Set PyCompat to default value
|
||||
PyCompat = false
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
func TestIntervalParsing(t *testing.T) {
|
||||
log.Printf("Testing case TestIntervalParsing")
|
||||
|
||||
config, err := LoadConfig("./test/valid-config.yml")
|
||||
if err != nil {
|
||||
t.Errorf("Failed loading config: %v", err)
|
||||
}
|
||||
|
||||
oneSecond := time.Second
|
||||
tenSeconds := 10 * time.Second
|
||||
oneMinute := time.Minute
|
||||
|
||||
// validate top level interval seconds represented as an int
|
||||
if config.CheckInterval.Value() != oneSecond {
|
||||
t.Errorf("Incorrectly parsed int seconds. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||
}
|
||||
|
||||
if config.Monitors[0].CheckInterval.Value() != tenSeconds {
|
||||
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||
}
|
||||
|
||||
if config.Monitors[1].CheckInterval.Value() != oneMinute {
|
||||
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
|
||||
// TestMultiLineConfig is a more complicated test stepping through the parsing
|
||||
// and execution of mutli-line strings presented in YAML
|
||||
func TestMultiLineConfig(t *testing.T) {
|
||||
log.Println("Testing multi-line string config")
|
||||
|
||||
config, err := LoadConfig("./test/valid-verify-multi-line.yml")
|
||||
if err != nil {
|
||||
t.Fatalf("TestMultiLineConfig(load), expected=no_error actual=%v", err)
|
||||
@@ -48,8 +79,10 @@ func TestMultiLineConfig(t *testing.T) {
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(parse > string)")
|
||||
|
||||
expected := "echo 'Some string with stuff'; echo \"<angle brackets>\"; exit 1\n"
|
||||
actual := config.Monitors[0].Command.ShellCommand
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(>) failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
@@ -60,12 +93,15 @@ func TestMultiLineConfig(t *testing.T) {
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(execute > string)")
|
||||
|
||||
_, notice := config.Monitors[0].Check()
|
||||
if notice == nil {
|
||||
t.Fatalf("Did not receive an alert notice")
|
||||
}
|
||||
|
||||
expected = "Some string with stuff\n<angle brackets>\n"
|
||||
actual = notice.LastCheckOutput
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(execute > string) check failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
@@ -76,8 +112,10 @@ func TestMultiLineConfig(t *testing.T) {
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(parse | string)")
|
||||
|
||||
expected = "echo 'Some string with stuff'\necho '<angle brackets>'\n"
|
||||
actual = config.Alerts["log_shell"].Command.ShellCommand
|
||||
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(|) failed")
|
||||
t.Logf("string expected=`%v`", expected)
|
||||
@@ -88,10 +126,12 @@ func TestMultiLineConfig(t *testing.T) {
|
||||
|
||||
log.Println("-----")
|
||||
log.Println("TestMultiLineConfig(execute | string)")
|
||||
|
||||
actual, err = config.Alerts["log_shell"].Send(AlertNotice{})
|
||||
if err != nil {
|
||||
t.Errorf("Execution of alert failed")
|
||||
}
|
||||
|
||||
expected = "Some string with stuff\n<angle brackets>\n"
|
||||
if expected != actual {
|
||||
t.Errorf("TestMultiLineConfig(execute | string) check failed")
|
||||
|
||||
@@ -1,8 +1,9 @@
|
||||
module git.iamthefij.com/iamthefij/minitor-go
|
||||
|
||||
go 1.12
|
||||
go 1.15
|
||||
|
||||
require (
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0
|
||||
github.com/prometheus/client_golang v1.2.1
|
||||
gopkg.in/yaml.v2 v2.2.4
|
||||
)
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0 h1:4Hu5PQvDrW5e3FrTS3q2iIXW0iPvhNY/9qJsqDR3K3I=
|
||||
git.iamthefij.com/iamthefij/slog v1.3.0/go.mod h1:1RUj4hcCompZkAxXCRfUX786tb3cM/Zpkn97dGfUfbg=
|
||||
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
|
||||
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
|
||||
|
||||
@@ -1,16 +1,15 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
var (
|
||||
// LogDebug will control whether debug messsages should be logged
|
||||
LogDebug = false
|
||||
|
||||
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
||||
ExportMetrics = false
|
||||
// MetricsPort is the port to expose metrics on
|
||||
@@ -23,58 +22,68 @@ var (
|
||||
|
||||
// version of minitor being run
|
||||
version = "dev"
|
||||
|
||||
errUnknownAlert = errors.New("unknown alert")
|
||||
)
|
||||
|
||||
func checkMonitors(config *Config) error {
|
||||
for _, monitor := range config.Monitors {
|
||||
if monitor.ShouldCheck() {
|
||||
success, alertNotice := monitor.Check()
|
||||
|
||||
hasAlert := alertNotice != nil
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, success)
|
||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
||||
|
||||
// Should probably consider refactoring everything below here
|
||||
if alertNotice != nil {
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
|
||||
}
|
||||
func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) error {
|
||||
slog.Debugf("Received an alert notice from %s", alertNotice.MonitorName)
|
||||
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
||||
|
||||
if alertNames == nil {
|
||||
// This should only happen for a recovery alert. AlertDown is validated not empty
|
||||
log.Printf(
|
||||
"WARNING: Recieved alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||
slog.Warningf(
|
||||
"Received alert, but no alert mechanisms exist. MonitorName=%s IsUp=%t",
|
||||
alertNotice.MonitorName, alertNotice.IsUp,
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
for _, alertName := range alertNames {
|
||||
if alert, ok := config.Alerts[alertName]; ok {
|
||||
output, err := alert.Send(*alertNotice)
|
||||
if err != nil {
|
||||
log.Printf(
|
||||
"ERROR: Alert '%s' failed. result=%v: output=%s",
|
||||
slog.Errorf(
|
||||
"Alert '%s' failed. result=%v: output=%s",
|
||||
alert.Name,
|
||||
err,
|
||||
output,
|
||||
)
|
||||
return fmt.Errorf(
|
||||
"Unsuccessfully triggered alert '%s'. "+
|
||||
"Crashing to avoid false negatives: %v",
|
||||
alert.Name,
|
||||
err,
|
||||
)
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// Count alert metrics
|
||||
Metrics.CountAlert(monitor.Name, alert.Name)
|
||||
} else {
|
||||
// This case should never actually happen since we validate against it
|
||||
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
return fmt.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
slog.Errorf("Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
|
||||
return fmt.Errorf("unknown alert for monitor %s: %s: %w", alertNotice.MonitorName, alertName, errUnknownAlert)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func checkMonitors(config *Config) error {
|
||||
// TODO: Run this in goroutines and capture exceptions
|
||||
for _, monitor := range config.Monitors {
|
||||
if monitor.ShouldCheck() {
|
||||
success, alertNotice := monitor.Check()
|
||||
hasAlert := alertNotice != nil
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, monitor.IsUp())
|
||||
Metrics.CountCheck(monitor.Name, success, monitor.LastCheckSeconds(), hasAlert)
|
||||
|
||||
if alertNotice != nil {
|
||||
err := sendAlerts(config, monitor, alertNotice)
|
||||
// If there was an error in sending an alert, exit early and bubble it up
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -83,41 +92,38 @@ func checkMonitors(config *Config) error {
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get debug flag
|
||||
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
|
||||
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
||||
|
||||
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
|
||||
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
||||
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
|
||||
flag.IntVar(&MetricsPort, "metrics-port", 8080, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
|
||||
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
var configPath = flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
|
||||
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
|
||||
flag.Parse()
|
||||
|
||||
// Print version if flag is provided
|
||||
if *showVersion {
|
||||
log.Println("Minitor version:", version)
|
||||
fmt.Println("Minitor version:", version)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Load configuration
|
||||
config, err := LoadConfig(*configPath)
|
||||
if err != nil {
|
||||
log.Fatalf("Error loading config: %v", err)
|
||||
}
|
||||
slog.OnErrFatalf(err, "Error loading config: %v", err)
|
||||
|
||||
// Serve metrics exporter, if specified
|
||||
if ExportMetrics {
|
||||
log.Println("INFO: Exporting metrics to Prometheus")
|
||||
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
|
||||
|
||||
go ServeMetrics()
|
||||
}
|
||||
|
||||
// Start main loop
|
||||
for {
|
||||
err = checkMonitors(&config)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
slog.OnErrPanicf(err, "Error checking monitors")
|
||||
|
||||
sleepTime := time.Duration(config.CheckInterval) * time.Second
|
||||
time.Sleep(sleepTime)
|
||||
time.Sleep(config.CheckInterval.Value())
|
||||
}
|
||||
}
|
||||
|
||||
+44
-22
@@ -16,7 +16,7 @@ func TestCheckMonitors(t *testing.T) {
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
{
|
||||
Name: "Success",
|
||||
Command: CommandOrShell{Command: []string{"true"}},
|
||||
},
|
||||
@@ -28,36 +28,24 @@ func TestCheckMonitors(t *testing.T) {
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
&Monitor{
|
||||
Name: "Failure",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
AlertDown: []string{"unknown"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
name: "Monitor failure, no and unknown alerts",
|
||||
name: "Monitor failure, no alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
{
|
||||
Name: "Success",
|
||||
Command: CommandOrShell{Command: []string{"ls"}},
|
||||
alertCount: 1,
|
||||
},
|
||||
&Monitor{
|
||||
Name: "Success",
|
||||
Command: CommandOrShell{Command: []string{"true"}},
|
||||
AlertUp: []string{"unknown"},
|
||||
alertCount: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: false,
|
||||
@@ -66,7 +54,35 @@ func TestCheckMonitors(t *testing.T) {
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
AlertDown: []string{"unknown"},
|
||||
AlertAfter: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
name: "Monitor failure, unknown alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
{
|
||||
Name: "Success",
|
||||
Command: CommandOrShell{Command: []string{"true"}},
|
||||
AlertUp: []string{"unknown"},
|
||||
alertCount: 1,
|
||||
},
|
||||
},
|
||||
},
|
||||
expectErr: true,
|
||||
name: "Monitor recovery, unknown alerts",
|
||||
},
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
AlertDown: []string{"good"},
|
||||
@@ -74,7 +90,7 @@ func TestCheckMonitors(t *testing.T) {
|
||||
},
|
||||
},
|
||||
Alerts: map[string]*Alert{
|
||||
"good": &Alert{
|
||||
"good": {
|
||||
Command: CommandOrShell{Command: []string{"true"}},
|
||||
},
|
||||
},
|
||||
@@ -85,7 +101,7 @@ func TestCheckMonitors(t *testing.T) {
|
||||
{
|
||||
config: Config{
|
||||
Monitors: []*Monitor{
|
||||
&Monitor{
|
||||
{
|
||||
Name: "Failure",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
AlertDown: []string{"bad"},
|
||||
@@ -93,7 +109,7 @@ func TestCheckMonitors(t *testing.T) {
|
||||
},
|
||||
},
|
||||
Alerts: map[string]*Alert{
|
||||
"bad": &Alert{
|
||||
"bad": {
|
||||
Name: "bad",
|
||||
Command: CommandOrShell{Command: []string{"false"}},
|
||||
},
|
||||
@@ -105,10 +121,16 @@ func TestCheckMonitors(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
c.config.Init()
|
||||
err := checkMonitors(&c.config)
|
||||
err := c.config.Init()
|
||||
if err != nil {
|
||||
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
|
||||
}
|
||||
|
||||
err = checkMonitors(&c.config)
|
||||
if err == nil && c.expectErr {
|
||||
t.Errorf("checkMonitors(%s): Expected panic, the code did not panic", c.name)
|
||||
} else if err != nil && !c.expectErr {
|
||||
t.Errorf("checkMonitors(%s): Did not expect an error, but we got one anyway: %v", c.name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
+18
-2
@@ -19,6 +19,7 @@ import (
|
||||
type MinitorMetrics struct {
|
||||
alertCount *prometheus.CounterVec
|
||||
checkCount *prometheus.CounterVec
|
||||
checkTime *prometheus.GaugeVec
|
||||
monitorStatus *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
@@ -40,9 +41,16 @@ func NewMetrics() *MinitorMetrics {
|
||||
},
|
||||
[]string{"monitor", "status", "is_alert"},
|
||||
),
|
||||
checkTime: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_check_seconds",
|
||||
Help: "Time in miliseconds that a check ran for",
|
||||
},
|
||||
[]string{"monitor", "status"},
|
||||
),
|
||||
monitorStatus: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_monitor_up_count",
|
||||
Name: "minitor_monitor_up",
|
||||
Help: "Status of currently responsive monitors",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
@@ -52,6 +60,7 @@ func NewMetrics() *MinitorMetrics {
|
||||
// Register newly created metrics
|
||||
prometheus.MustRegister(metrics.alertCount)
|
||||
prometheus.MustRegister(metrics.checkCount)
|
||||
prometheus.MustRegister(metrics.checkTime)
|
||||
prometheus.MustRegister(metrics.monitorStatus)
|
||||
|
||||
return metrics
|
||||
@@ -63,11 +72,12 @@ func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
||||
if isUp {
|
||||
val = 1.0
|
||||
}
|
||||
|
||||
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
||||
}
|
||||
|
||||
// CountCheck counts the result of a particular Monitor check
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, secs float64, isAlert bool) {
|
||||
status := "failure"
|
||||
if isSuccess {
|
||||
status = "success"
|
||||
@@ -81,6 +91,10 @@ func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAler
|
||||
metrics.checkCount.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||
).Inc()
|
||||
|
||||
metrics.checkTime.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status},
|
||||
).Set(secs)
|
||||
}
|
||||
|
||||
// CountAlert counts an alert
|
||||
@@ -96,6 +110,8 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
|
||||
// ServeMetrics starts an http server with a Prometheus metrics handler
|
||||
func ServeMetrics() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
host := fmt.Sprintf(":%d", MetricsPort)
|
||||
|
||||
_ = http.ListenAndServe(host, nil)
|
||||
}
|
||||
|
||||
+42
-32
@@ -1,28 +1,31 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
"git.iamthefij.com/iamthefij/slog"
|
||||
)
|
||||
|
||||
// Monitor represents a particular periodic check of a command
|
||||
type Monitor struct {
|
||||
type Monitor struct { //nolint:maligned
|
||||
// Config values
|
||||
Name string
|
||||
Command CommandOrShell
|
||||
AlertDown []string `yaml:"alert_down"`
|
||||
AlertUp []string `yaml:"alert_up"`
|
||||
CheckInterval float64 `yaml:"check_interval"`
|
||||
AlertAfter int16 `yaml:"alert_after"`
|
||||
AlertEvery int16 `yaml:"alert_every"`
|
||||
CheckInterval SecondsOrDuration `yaml:"check_interval"`
|
||||
Name string
|
||||
AlertDown []string `yaml:"alert_down"`
|
||||
AlertUp []string `yaml:"alert_up"`
|
||||
Command CommandOrShell
|
||||
|
||||
// Other values
|
||||
lastCheck time.Time
|
||||
lastOutput string
|
||||
alertCount int16
|
||||
failureCount int16
|
||||
lastCheck time.Time
|
||||
lastSuccess time.Time
|
||||
lastOutput string
|
||||
lastCheckDuration time.Duration
|
||||
}
|
||||
|
||||
// IsValid returns a boolean indicating if the Monitor has been correctly
|
||||
@@ -40,8 +43,9 @@ func (monitor Monitor) ShouldCheck() bool {
|
||||
return true
|
||||
}
|
||||
|
||||
sinceLastCheck := time.Now().Sub(monitor.lastCheck).Seconds()
|
||||
return sinceLastCheck >= monitor.CheckInterval
|
||||
sinceLastCheck := time.Since(monitor.lastCheck)
|
||||
|
||||
return sinceLastCheck >= monitor.CheckInterval.Value()
|
||||
}
|
||||
|
||||
// Check will run the command configured by the Monitor and return a status
|
||||
@@ -54,11 +58,14 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
cmd = ShellCommand(monitor.Command.ShellCommand)
|
||||
}
|
||||
|
||||
checkStartTime := time.Now()
|
||||
output, err := cmd.CombinedOutput()
|
||||
monitor.lastCheck = time.Now()
|
||||
monitor.lastOutput = string(output)
|
||||
monitor.lastCheckDuration = monitor.lastCheck.Sub(checkStartTime)
|
||||
|
||||
var alertNotice *AlertNotice
|
||||
|
||||
isSuccess := (err == nil)
|
||||
if isSuccess {
|
||||
alertNotice = monitor.success()
|
||||
@@ -66,17 +73,11 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
alertNotice = monitor.failure()
|
||||
}
|
||||
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
|
||||
}
|
||||
if err != nil {
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command result: %v", err)
|
||||
}
|
||||
}
|
||||
slog.Debugf("Command output: %s", monitor.lastOutput)
|
||||
slog.OnErrWarnf(err, "Command result: %v", err)
|
||||
|
||||
log.Printf(
|
||||
"INFO: %s success=%t, alert=%t",
|
||||
slog.Infof(
|
||||
"%s success=%t, alert=%t",
|
||||
monitor.Name,
|
||||
isSuccess,
|
||||
alertNotice != nil,
|
||||
@@ -85,15 +86,22 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
return isSuccess, alertNotice
|
||||
}
|
||||
|
||||
func (monitor Monitor) isUp() bool {
|
||||
// IsUp returns the status of the current monitor
|
||||
func (monitor Monitor) IsUp() bool {
|
||||
return monitor.alertCount == 0
|
||||
}
|
||||
|
||||
// LastCheckSeconds gives number of seconds the last check ran for
|
||||
func (monitor Monitor) LastCheckSeconds() float64 {
|
||||
return monitor.lastCheckDuration.Seconds()
|
||||
}
|
||||
|
||||
func (monitor *Monitor) success() (notice *AlertNotice) {
|
||||
if !monitor.isUp() {
|
||||
if !monitor.IsUp() {
|
||||
// Alert that we have recovered
|
||||
notice = monitor.createAlertNotice(true)
|
||||
}
|
||||
|
||||
monitor.failureCount = 0
|
||||
monitor.alertCount = 0
|
||||
monitor.lastSuccess = time.Now()
|
||||
@@ -105,15 +113,14 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
monitor.failureCount++
|
||||
// If we haven't hit the minimum failures, we can exit
|
||||
if monitor.failureCount < monitor.getAlertAfter() {
|
||||
if LogDebug {
|
||||
log.Printf(
|
||||
"DEBUG: %s failed but did not hit minimum failures. "+
|
||||
slog.Debugf(
|
||||
"%s failed but did not hit minimum failures. "+
|
||||
"Count: %v alert after: %v",
|
||||
monitor.Name,
|
||||
monitor.failureCount,
|
||||
monitor.getAlertAfter(),
|
||||
)
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
@@ -121,19 +128,20 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
failureCount := (monitor.failureCount - monitor.getAlertAfter())
|
||||
|
||||
// Use alert cadence to determine if we should alert
|
||||
if monitor.AlertEvery > 0 {
|
||||
switch {
|
||||
case monitor.AlertEvery > 0:
|
||||
// Handle integer number of failures before alerting
|
||||
if failureCount%monitor.AlertEvery == 0 {
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
} else if monitor.AlertEvery == 0 {
|
||||
case monitor.AlertEvery == 0:
|
||||
// Handle alerting on first failure only
|
||||
if failureCount == 0 {
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
} else {
|
||||
default:
|
||||
// Handle negative numbers indicating an exponential backoff
|
||||
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) {
|
||||
if failureCount >= int16(math.Pow(2, float64(monitor.alertCount))-1) { //nolint:gomnd
|
||||
notice = monitor.createAlertNotice(false)
|
||||
}
|
||||
}
|
||||
@@ -143,7 +151,7 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
monitor.alertCount++
|
||||
}
|
||||
|
||||
return
|
||||
return notice
|
||||
}
|
||||
|
||||
func (monitor Monitor) getAlertAfter() int16 {
|
||||
@@ -152,6 +160,7 @@ func (monitor Monitor) getAlertAfter() int16 {
|
||||
if monitor.AlertAfter == 0 {
|
||||
return 1
|
||||
}
|
||||
|
||||
return monitor.AlertAfter
|
||||
}
|
||||
|
||||
@@ -160,6 +169,7 @@ func (monitor Monitor) GetAlertNames(up bool) []string {
|
||||
if up {
|
||||
return monitor.AlertUp
|
||||
}
|
||||
|
||||
return monitor.AlertDown
|
||||
}
|
||||
|
||||
|
||||
+25
-6
@@ -22,11 +22,13 @@ func TestMonitorIsValid(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
actual := c.monitor.IsValid()
|
||||
if actual != c.expected {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -43,9 +45,9 @@ func TestMonitorShouldCheck(t *testing.T) {
|
||||
name string
|
||||
}{
|
||||
{Monitor{}, true, "Empty"},
|
||||
{Monitor{lastCheck: timeNow, CheckInterval: 15}, false, "Just checked"},
|
||||
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: 15}, false, "-10s"},
|
||||
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: 15}, true, "-20s"},
|
||||
{Monitor{lastCheck: timeNow, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "Just checked"},
|
||||
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "-10s"},
|
||||
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, true, "-20s"},
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
@@ -56,7 +58,7 @@ func TestMonitorShouldCheck(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestMonitorIsUp tests the Monitor.isUp()
|
||||
// TestMonitorIsUp tests the Monitor.IsUp()
|
||||
func TestMonitorIsUp(t *testing.T) {
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
@@ -71,11 +73,13 @@ func TestMonitorIsUp(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
actual := c.monitor.isUp()
|
||||
|
||||
actual := c.monitor.IsUp()
|
||||
if actual != c.expected {
|
||||
t.Errorf("isUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
t.Errorf("IsUp(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -96,11 +100,13 @@ func TestMonitorGetAlertNames(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
actual := c.monitor.GetAlertNames(c.up)
|
||||
if !EqualSliceString(actual, c.expected) {
|
||||
t.Errorf("GetAlertNames(%v), expected=%v actual=%v", c.name, c.expected, actual)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -119,12 +125,15 @@ func TestMonitorSuccess(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
notice := c.monitor.success()
|
||||
hasNotice := (notice != nil)
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("success(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -147,12 +156,15 @@ func TestMonitorFailureAlertAfter(t *testing.T) {
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
notice := c.monitor.failure()
|
||||
hasNotice := (notice != nil)
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -195,10 +207,12 @@ func TestMonitorFailureAlertEvery(t *testing.T) {
|
||||
|
||||
notice := c.monitor.failure()
|
||||
hasNotice := (notice != nil)
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -223,15 +237,18 @@ func TestMonitorFailureExponential(t *testing.T) {
|
||||
// Unlike previous tests, this one requires a static Monitor with repeated
|
||||
// calls to the failure method
|
||||
monitor := Monitor{failureCount: 0, AlertAfter: 1, AlertEvery: -1}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Printf("Testing case %s", c.name)
|
||||
|
||||
notice := monitor.failure()
|
||||
hasNotice := (notice != nil)
|
||||
|
||||
if hasNotice != c.expectNotice {
|
||||
t.Errorf("failure(%v), expected=%t actual=%t", c.name, c.expectNotice, hasNotice)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
@@ -243,6 +260,7 @@ func TestMonitorCheck(t *testing.T) {
|
||||
hasNotice bool
|
||||
lastOutput string
|
||||
}
|
||||
|
||||
cases := []struct {
|
||||
monitor Monitor
|
||||
expect expected
|
||||
@@ -290,6 +308,7 @@ func TestMonitorCheck(t *testing.T) {
|
||||
t.Errorf("Check(%v) (output), expected=%v actual=%v", c.name, c.expect.lastOutput, lastOutput)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
+4
-3
@@ -3,14 +3,14 @@ check_interval: 5
|
||||
|
||||
monitors:
|
||||
- name: Fake Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
|
||||
command: ["curl", "-s", "-o", "/dev/null", "https://minitor.mon"]
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 10 # Must be at minimum the global `check_interval`
|
||||
alert_after: 3
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
- name: Real Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
|
||||
command: ["curl", "-s", "-o", "/dev/null", "https://google.com"]
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 5
|
||||
@@ -23,7 +23,8 @@ alerts:
|
||||
log_up:
|
||||
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||
email_up:
|
||||
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||
command:
|
||||
[sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||
mailgun_down:
|
||||
command: >
|
||||
curl -s -X POST
|
||||
|
||||
+15
-3
@@ -11,6 +11,7 @@ set -e
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
num_log_lines="$2"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
@@ -31,21 +32,32 @@ function get_container_id {
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id=$1
|
||||
local container_id="$1"
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
# Gets some lines from docker log
|
||||
function get_logs {
|
||||
container_id="$1"
|
||||
num_lines="$2"
|
||||
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Usage: $0 container_name [num_log_lines]"
|
||||
echo "Will exit with the last status code of continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id $container_name)
|
||||
container_id=$(get_container_id "$container_name")
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
||||
|
||||
if [ -n "$num_log_lines" ]; then
|
||||
get_logs "$container_id" "$num_log_lines"
|
||||
fi
|
||||
|
||||
exit "$exit_code"
|
||||
|
||||
@@ -11,6 +11,7 @@ set -e
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
num_log_lines="$2"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
@@ -35,8 +36,15 @@ function inspect_container {
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
# Gets some lines from docker log
|
||||
function get_logs {
|
||||
container_id="$1"
|
||||
num_lines="$2"
|
||||
curl_docker "containers/$container_id/logs?stdout=1&stderr=1" | tail -n "$num_lines"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Usage: $0 container_name [num_log_lines]"
|
||||
echo "Will return results of healthcheck for continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
@@ -48,6 +56,10 @@ if [ -z "$container_id" ]; then
|
||||
fi
|
||||
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
||||
|
||||
if [ -n "$num_log_lines" ]; then
|
||||
get_logs "$container_id" "$num_log_lines"
|
||||
fi
|
||||
|
||||
case "$health" in
|
||||
null)
|
||||
echo "No healthcheck results"
|
||||
|
||||
@@ -3,21 +3,23 @@ check_interval: 1
|
||||
|
||||
monitors:
|
||||
- name: Command
|
||||
command: ['echo', '$PATH']
|
||||
alert_down: ['log_command', 'log_shell']
|
||||
command: ["echo", "$PATH"]
|
||||
alert_down: ["log_command", "log_shell"]
|
||||
alert_every: 0
|
||||
check_interval: 10s
|
||||
- name: Shell
|
||||
command: >
|
||||
echo 'Some string with stuff';
|
||||
echo 'another line';
|
||||
echo $PATH;
|
||||
exit 1
|
||||
alert_down: ['log_command', 'log_shell']
|
||||
alert_down: ["log_command", "log_shell"]
|
||||
alert_after: 5
|
||||
alert_every: 0
|
||||
check_interval: 1m
|
||||
|
||||
alerts:
|
||||
log_command:
|
||||
command: ['echo', 'regular', '"command!!!"', "{{.MonitorName}}"]
|
||||
command: ["echo", "regular", '"command!!!"', "{{.MonitorName}}"]
|
||||
log_shell:
|
||||
command: echo "Failure on {{.MonitorName}} User is $USER"
|
||||
|
||||
@@ -8,7 +8,7 @@ import (
|
||||
// ShellCommand takes a string and executes it as a command using `sh`
|
||||
func ShellCommand(command string) *exec.Cmd {
|
||||
shellCommand := []string{"sh", "-c", strings.TrimSpace(command)}
|
||||
//log.Printf("Shell command: %v", shellCommand)
|
||||
|
||||
return exec.Command(shellCommand[0], shellCommand[1:]...)
|
||||
}
|
||||
|
||||
@@ -17,10 +17,12 @@ func EqualSliceString(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
|
||||
for i, val := range a {
|
||||
if val != b[i] {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user