Compare commits

..

1 Commits

Author SHA1 Message Date
Ian Fijolek
6c7c0a470f WIP: Begin adding prometheus metrics exporting 2019-11-15 11:25:21 -08:00
15 changed files with 51 additions and 405 deletions
+4 -50
View File
@@ -1,59 +1,13 @@
---
kind: pipeline kind: pipeline
name: test name: test
steps: steps:
- name: build
image: golang:1.12
commands:
- make build
- name: test - name: test
image: golang:1.12 image: golang:1.12
commands: commands:
- make build
- make test - make test
- name: check
image: python:3
commands:
- pip install pre-commit==1.20.0
- make check
- name: notify
image: drillster/drone-email
settings:
host:
from_secret: SMTP_HOST
username:
from_secret: SMTP_USER
password:
from_secret: SMTP_PASS
from: drone@iamthefij.com
when:
status: [changed, failure]
---
kind: pipeline
name: publish
depends_on:
- test
trigger:
event:
- push
- tag
refs:
- refs/heads/master
- refs/tags/v*
steps:
# Might consider moving this step into the previous pipeline
- name: push image
image: plugins/docker
settings:
repo: iamthefij/minitor-go
dockerfile: Dockerfile.multi-stage
auto_tag: true
username:
from_secret: docker_username
password:
from_secret: docker_password
-19
View File
@@ -1,19 +0,0 @@
---
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v2.4.0
hooks:
- id: check-added-large-files
- id: check-yaml
args:
- --allow-multiple-documents
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-merge-conflict
- repo: git://github.com/dnephin/pre-commit-golang
rev: v0.3.5
hooks:
- id: go-fmt
- id: go-imports
# - id: gometalinter
# - id: golangci-lint
+2 -18
View File
@@ -1,24 +1,8 @@
ARG REPO=library ARG REPO=library
FROM ${REPO}/alpine:3.10 FROM ${REPO}/busybox:latest
RUN mkdir /app WORKDIR /root/
WORKDIR /app/
# Copy minitor in
ARG ARCH=amd64 ARG ARCH=amd64
COPY ./minitor-go ./minitor COPY ./minitor-go ./minitor
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
# Copy scripts
COPY ./scripts /app/scripts
RUN chown -R minitor:minitor /app
RUN chmod -R 755 /app/scripts
# Drop to non-root user
USER minitor
ENTRYPOINT [ "./minitor" ] ENTRYPOINT [ "./minitor" ]
+3 -22
View File
@@ -1,7 +1,7 @@
ARG REPO=library ARG REPO=library
FROM golang:1.12-alpine AS builder FROM golang:1.12-alpine AS builder
RUN apk add --no-cache git=~2 RUN apk add --no-cache git
RUN mkdir /app RUN mkdir /app
WORKDIR /app WORKDIR /app
@@ -16,27 +16,8 @@ ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH} ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor . RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM ${REPO}/alpine:3.10 FROM ${REPO}/busybox:latest
RUN mkdir /app WORKDIR /root/
WORKDIR /app/
# Copy minitor in
COPY --from=builder /app/minitor . COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
# Copy scripts
COPY ./scripts /app/scripts
RUN chown -R minitor:minitor /app
RUN chmod -R 755 /app/scripts
# Drop to non-root user
USER minitor
ENTRYPOINT [ "./minitor" ] ENTRYPOINT [ "./minitor" ]
# vim: set filetype=dockerfile:
+1 -15
View File
@@ -1,7 +1,6 @@
.PHONY: all
DOCKER_TAG ?= minitor-go-${USER} DOCKER_TAG ?= minitor-go-${USER}
.PHONY: default .PHONY: test
default: test default: test
.PHONY: build .PHONY: build
@@ -15,10 +14,6 @@ minitor-go:
run: minitor-go build run: minitor-go build
./minitor-go -debug ./minitor-go -debug
.PHONY: run-metrics
run-metrics: minitor-go build
./minitor-go -debug -metrics
.PHONY: test .PHONY: test
test: test:
go test -coverprofile=coverage.out go test -coverprofile=coverage.out
@@ -29,15 +24,6 @@ test:
@go tool cover -func=coverage.out | awk -v target=80.0% \ @go tool cover -func=coverage.out | awk -v target=80.0% \
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }' '/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
# Installs pre-commit hooks
.PHONY: install-hooks
install-hooks:
pre-commit install --install-hooks
# Checks files for encryption
.PHONY: check
check:
pre-commit run --all-files
.PHONY: clean .PHONY: clean
clean: clean:
+7 -9
View File
@@ -2,7 +2,7 @@
A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
Minitor is already a minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a large footprint. Thus Go feels like a better fit for the project, longer term. Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features. Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
@@ -30,7 +30,7 @@ monitors:
command_shell: echo 'test' command_shell: echo 'test'
``` ```
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct, `AlertNotice` defined in `alert.go` and the built in Go templating format. Eg. Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
minitor-py: minitor-py:
```yaml ```yaml
@@ -38,7 +38,7 @@ alerts:
log_command: log_command:
command: ['echo', '{monitor_name}'] command: ['echo', '{monitor_name}']
log_shell: log_shell:
command_shell: 'echo {monitor_name}' command_shell: "echo {monitor_name}"
``` ```
minitor-go: minitor-go:
@@ -47,7 +47,7 @@ alerts:
log_command: log_command:
command: ['echo', '{{.MonitorName}}'] command: ['echo', '{{.MonitorName}}']
log_shell: log_shell:
command_shell: 'echo {{.MonitorName}}' command_shell: "echo {{.MonitorName}}"
``` ```
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not. Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
@@ -84,11 +84,10 @@ Pairity:
- [x] Run alert commands - [x] Run alert commands
- [x] Run alert commands in a shell - [x] Run alert commands in a shell
- [x] Allow templating of alert commands - [x] Allow templating of alert commands
- [x] Implement Prometheus client to export metrics - [ ] Implement Prometheus client to export metrics
- [x] Test coverage - [ ] Test coverage
- [ ] Integration testing (manual or otherwise)
Improvement (potentially breaking): Improvement:
- [ ] Implement leveled logging (maybe glog or logrus) - [ ] Implement leveled logging (maybe glog or logrus)
- [ ] Consider switching from YAML to TOML - [ ] Consider switching from YAML to TOML
@@ -96,4 +95,3 @@ Improvement (potentially breaking):
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging - [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging
- [ ] Async checking - [ ] Async checking
- [ ] Use durations rather than seconds checked in event loop - [ ] Use durations rather than seconds checked in event loop
- [ ] Revisit metrics and see if they all make sense
+2 -5
View File
@@ -2,11 +2,10 @@ package main
import ( import (
"errors" "errors"
"gopkg.in/yaml.v2"
"io/ioutil" "io/ioutil"
"log" "log"
"os" "os"
"gopkg.in/yaml.v2"
) )
// Config type is contains all provided user configuration // Config type is contains all provided user configuration
@@ -85,9 +84,7 @@ func LoadConfig(filePath string) (config Config, err error) {
return return
} }
if LogDebug { log.Printf("config:\n%v\n", config)
log.Printf("DEBUG: Config values:\n%v\n", config)
}
if !config.IsValid() { if !config.IsValid() {
err = errors.New("Invalid configuration") err = errors.New("Invalid configuration")
+9 -15
View File
@@ -3,7 +3,9 @@ package main
import ( import (
"flag" "flag"
"fmt" "fmt"
"github.com/prometheus/client_golang/prometheus/promhttp"
"log" "log"
"net/http"
"time" "time"
) )
@@ -13,10 +15,6 @@ var (
// ExportMetrics will track whether or not we want to export metrics to prometheus // ExportMetrics will track whether or not we want to export metrics to prometheus
ExportMetrics = false ExportMetrics = false
// MetricsPort is the port to expose metrics on
MetricsPort = 8080
// Metrics contains all active metrics
Metrics = NewMetrics()
// version of minitor being run // version of minitor being run
version = "dev" version = "dev"
@@ -25,13 +23,7 @@ var (
func checkMonitors(config *Config) error { func checkMonitors(config *Config) error {
for _, monitor := range config.Monitors { for _, monitor := range config.Monitors {
if monitor.ShouldCheck() { if monitor.ShouldCheck() {
success, alertNotice := monitor.Check() _, alertNotice := monitor.Check()
hasAlert := alertNotice != nil
// Track status metrics
Metrics.SetMonitorStatus(monitor.Name, success)
Metrics.CountCheck(monitor.Name, success, hasAlert)
// Should probably consider refactoring everything below here // Should probably consider refactoring everything below here
if alertNotice != nil { if alertNotice != nil {
@@ -63,9 +55,6 @@ func checkMonitors(config *Config) error {
err, err,
) )
} }
// Count alert metrics
Metrics.CountAlert(monitor.Name, alert.Name)
} else { } else {
// This case should never actually happen since we validate against it // This case should never actually happen since we validate against it
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName) log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
@@ -79,6 +68,11 @@ func checkMonitors(config *Config) error {
return nil return nil
} }
func serveMetrics() {
http.Handle("/metrics", promhttp.Handler())
_ = http.ListenAndServe(":8080", nil)
}
func main() { func main() {
// Get debug flag // Get debug flag
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)") flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
@@ -101,7 +95,7 @@ func main() {
// Serve metrics exporter, if specified // Serve metrics exporter, if specified
if ExportMetrics { if ExportMetrics {
log.Println("INFO: Exporting metrics to Prometheus") log.Println("INFO: Exporting metrics to Prometheus")
go ServeMetrics() go serveMetrics()
} }
// Start main loop // Start main loop
-101
View File
@@ -1,101 +0,0 @@
package main
import (
"fmt"
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// TODO: Not sure if this is the best way to handle. A global instance for
// metrics isn't bad, but it might be nice to curry versions of the metrics
// for each monitor. Especially since every monitor has it's own. Perhaps
// another new function that essentially curries each metric for a given
// monitor name would do. This could be run when validating monitors and
// initializing alert templates.
// MinitorMetrics contains all counters and metrics that Minitor will need to access
type MinitorMetrics struct {
alertCount *prometheus.CounterVec
checkCount *prometheus.CounterVec
monitorStatus *prometheus.GaugeVec
}
// NewMetrics creates and initializes all metrics
func NewMetrics() *MinitorMetrics {
// Initialize all metrics
metrics := &MinitorMetrics{
alertCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "minitor_alert_total",
Help: "Number of Minitor alerts",
},
[]string{"alert", "monitor"},
),
checkCount: prometheus.NewCounterVec(
prometheus.CounterOpts{
Name: "minitor_check_total",
Help: "Number of Minitor checks",
},
[]string{"monitor", "status", "is_alert"},
),
monitorStatus: prometheus.NewGaugeVec(
prometheus.GaugeOpts{
Name: "minitor_monitor_up_count",
Help: "Status of currently responsive monitors",
},
[]string{"monitor"},
),
}
// Register newly created metrics
prometheus.MustRegister(metrics.alertCount)
prometheus.MustRegister(metrics.checkCount)
prometheus.MustRegister(metrics.monitorStatus)
return metrics
}
// SetMonitorStatus sets the current status of Monitor
func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
val := 0.0
if isUp {
val = 1.0
}
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
}
// CountCheck counts the result of a particular Monitor check
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
status := "failure"
if isSuccess {
status = "success"
}
alertVal := "false"
if isAlert {
alertVal = "true"
}
metrics.checkCount.With(
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
).Inc()
}
// CountAlert counts an alert
func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
metrics.alertCount.With(
prometheus.Labels{
"alert": alert,
"monitor": monitor,
},
).Inc()
}
// ServeMetrics starts an http server with a Prometheus metrics handler
func ServeMetrics() {
http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
+9 -21
View File
@@ -1,41 +1,29 @@
--- check_interval: 30
check_interval: 5
monitors: monitors:
- name: Fake Website - name: My Website
command: [ 'curl', '-s', '-o', '/dev/null', 'https://minitor.mon' ] command: [ 'curl', '-s', '-o', '/dev/null', 'https://minitor.mon' ]
alert_down: [log_down, mailgun_down, sms_down] alert_down: [ log, mailgun_down, sms_down ]
alert_up: [log_up, email_up] alert_up: [ log, email_up ]
check_interval: 10 # Must be at minimum the global `check_interval` check_interval: 30 # Must be at minimum the global `check_interval`
alert_after: 3 alert_after: 3
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
- name: Real Website
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
alert_down: [log_down, mailgun_down, sms_down]
alert_up: [log_up, email_up]
check_interval: 5
alert_after: 3
alert_every: -1
alerts: alerts:
log_down:
command: ["echo", "Minitor failure for {{.MonitorName}}"]
log_up:
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
email_up: email_up:
command: [ sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!" ] command: [ sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!" ]
mailgun_down: mailgun_down:
command_shell: > command: >
curl -s -X POST curl -s -X POST
-F subject="Alert! {{.MonitorName}} failed" -F subject="Alert! {monitor_name} failed"
-F from="Minitor <minitor@minitor.mon>" -F from="Minitor <minitor@minitor.mon>"
-F to=me@minitor.mon -F to=me@minitor.mon
-F text="Our monitor failed" -F text="Our monitor failed"
https://api.mailgun.net/v3/minitor.mon/messages https://api.mailgun.net/v3/minitor.mon/messages
-u "api:${MAILGUN_API_KEY}" -u "api:${MAILGUN_API_KEY}"
sms_down: sms_down:
command_shell: > command: >
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed" curl -s -X POST -F "Body=Failure! {monitor_name} has failed"
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}" -F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages" "https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
-u "${ACCOUNT_SID}:${AUTH_TOKEN}" -u "${ACCOUNT_SID}:${AUTH_TOKEN}"
-5
View File
@@ -1,5 +0,0 @@
# Minitor Scripts
A collection of some handy scripts to use with Minitor
These are not included with the Python package, but they are included in the Docker image in `/app/scripts`.
-51
View File
@@ -1,51 +0,0 @@
#! /bin/bash
set -e
#################
# docker_check.sh
#
# Checks the most recent state exit code of a Docker container
#################
# Docker host will default to a socket
# To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1"
# Curls Docker either using a socket or URL
function curl_docker {
local path="$1"
if [ "$DOCKER_HOST" == "socket" ]; then
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
else
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
fi
}
# Returns caintainer ID for a given container name
function get_container_id {
local container_name="$1"
curl_docker 'containers/json?all=1' \
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
}
# Returns container JSON
function inspect_container {
local container_id=$1
curl_docker "containers/$container_id/json"
}
if [ -z "$container_name" ]; then
echo "Usage: $0 container_name"
echo "Will exit with the last status code of continer with provided name"
exit 1
fi
container_id=$(get_container_id $container_name)
if [ -z "$container_id" ]; then
echo "ERROR: Could not find container with name: $container_name"
exit 1
fi
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
exit "$exit_code"
-61
View File
@@ -1,61 +0,0 @@
#! /bin/bash
set -e
#################
# docker_healthcheck.sh
#
# Returns the results of a Docker Healthcheck for a container
#################
# Docker host will default to a socket
# To override, export DOCKER_HOST to a new hostname
DOCKER_HOST="${DOCKER_HOST:=socket}"
container_name="$1"
# Curls Docker either using a socket or URL
function curl_docker {
local path="$1"
if [ "$DOCKER_HOST" == "socket" ]; then
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
else
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
fi
}
# Returns caintainer ID for a given container name
function get_container_id {
local container_name="$1"
curl_docker 'containers/json?all=1' \
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
}
# Returns container JSON
function inspect_container {
local container_id="$1"
curl_docker "containers/$container_id/json"
}
if [ -z "$container_name" ]; then
echo "Usage: $0 container_name"
echo "Will return results of healthcheck for continer with provided name"
exit 1
fi
container_id=$(get_container_id "$container_name")
if [ -z "$container_id" ]; then
echo "ERROR: Could not find container with name: $container_name"
exit 1
fi
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
case "$health" in
null)
echo "No healthcheck results"
;;
starting|healthy)
echo "Status: '$health'"
;;
*)
echo "Status: '$health'"
exit 1
esac
+1
View File
@@ -6,3 +6,4 @@ monitors:
alert_down: [ 'alert_down', 'log_shell', 'log_command' ] alert_down: [ 'alert_down', 'log_shell', 'log_command' ]
# alert_every: -1 # alert_every: -1
alert_every: 0 alert_every: 0