Compare commits
1 Commits
logrus
..
prometheus
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6c7c0a470f |
+4
-50
@@ -1,59 +1,13 @@
|
||||
---
|
||||
kind: pipeline
|
||||
name: test
|
||||
|
||||
steps:
|
||||
- name: build
|
||||
image: golang:1.12
|
||||
commands:
|
||||
- make build
|
||||
|
||||
- name: test
|
||||
image: golang:1.12
|
||||
commands:
|
||||
- make build
|
||||
- make test
|
||||
|
||||
- name: check
|
||||
image: python:3
|
||||
commands:
|
||||
- pip install pre-commit==1.20.0
|
||||
- make check
|
||||
|
||||
- name: notify
|
||||
image: drillster/drone-email
|
||||
settings:
|
||||
host:
|
||||
from_secret: SMTP_HOST
|
||||
username:
|
||||
from_secret: SMTP_USER
|
||||
password:
|
||||
from_secret: SMTP_PASS
|
||||
from: drone@iamthefij.com
|
||||
when:
|
||||
status: [changed, failure]
|
||||
|
||||
---
|
||||
kind: pipeline
|
||||
name: publish
|
||||
|
||||
depends_on:
|
||||
- test
|
||||
|
||||
trigger:
|
||||
event:
|
||||
- push
|
||||
- tag
|
||||
refs:
|
||||
- refs/heads/master
|
||||
- refs/tags/v*
|
||||
|
||||
steps:
|
||||
|
||||
# Might consider moving this step into the previous pipeline
|
||||
- name: push image
|
||||
image: plugins/docker
|
||||
settings:
|
||||
repo: iamthefij/minitor-go
|
||||
dockerfile: Dockerfile.multi-stage
|
||||
auto_tag: true
|
||||
username:
|
||||
from_secret: docker_username
|
||||
password:
|
||||
from_secret: docker_password
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
---
|
||||
repos:
|
||||
- repo: https://github.com/pre-commit/pre-commit-hooks
|
||||
rev: v2.4.0
|
||||
hooks:
|
||||
- id: check-added-large-files
|
||||
- id: check-yaml
|
||||
args:
|
||||
- --allow-multiple-documents
|
||||
- id: trailing-whitespace
|
||||
- id: end-of-file-fixer
|
||||
- id: check-merge-conflict
|
||||
- repo: git://github.com/dnephin/pre-commit-golang
|
||||
rev: v0.3.5
|
||||
hooks:
|
||||
- id: go-fmt
|
||||
- id: go-imports
|
||||
# - id: gometalinter
|
||||
# - id: golangci-lint
|
||||
+2
-17
@@ -1,23 +1,8 @@
|
||||
ARG REPO=library
|
||||
FROM ${REPO}/alpine:3.10
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
FROM ${REPO}/busybox:latest
|
||||
WORKDIR /root/
|
||||
|
||||
# Copy minitor in
|
||||
ARG ARCH=amd64
|
||||
COPY ./minitor-go ./minitor
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
|
||||
# Copy scripts
|
||||
COPY ./scripts /app/scripts
|
||||
RUN chmod -R 755 /app/scripts
|
||||
|
||||
# Drop to non-root user
|
||||
USER minitor
|
||||
|
||||
ENTRYPOINT [ "./minitor" ]
|
||||
|
||||
+3
-21
@@ -1,7 +1,7 @@
|
||||
ARG REPO=library
|
||||
FROM golang:1.12-alpine AS builder
|
||||
|
||||
RUN apk add --no-cache git=~2
|
||||
RUN apk add --no-cache git
|
||||
|
||||
RUN mkdir /app
|
||||
WORKDIR /app
|
||||
@@ -16,26 +16,8 @@ ARG VERSION=dev
|
||||
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
|
||||
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
|
||||
|
||||
FROM ${REPO}/alpine:3.10
|
||||
RUN mkdir /app
|
||||
WORKDIR /app/
|
||||
|
||||
# Copy minitor in
|
||||
FROM ${REPO}/busybox:latest
|
||||
WORKDIR /root/
|
||||
COPY --from=builder /app/minitor .
|
||||
|
||||
# Add common checking tools
|
||||
RUN apk --no-cache add bash=~5.0 curl=~7.66 jq=~1.6
|
||||
|
||||
# Add minitor user for running as non-root
|
||||
RUN addgroup -S minitor && adduser -S minitor -G minitor
|
||||
|
||||
# Copy scripts
|
||||
COPY ./scripts /app/scripts
|
||||
RUN chmod -R 755 /app/scripts
|
||||
|
||||
# Drop to non-root user
|
||||
USER minitor
|
||||
|
||||
ENTRYPOINT [ "./minitor" ]
|
||||
|
||||
# vim: set filetype=dockerfile:
|
||||
|
||||
@@ -7,17 +7,17 @@ AND DISTRIBUTION
|
||||
|
||||
1. Definitions.
|
||||
|
||||
|
||||
|
||||
|
||||
"License" shall mean the terms and conditions for use, reproduction, and distribution
|
||||
as defined by Sections 1 through 9 of this document.
|
||||
|
||||
|
||||
|
||||
|
||||
"Licensor" shall mean the copyright owner or entity authorized by the copyright
|
||||
owner that is granting the License.
|
||||
|
||||
|
||||
|
||||
|
||||
"Legal Entity" shall mean the union of the acting entity and all other entities
|
||||
that control, are controlled by, or are under common control with that entity.
|
||||
@@ -26,31 +26,31 @@ or indirect, to cause the direction or management of such entity, whether
|
||||
by contract or otherwise, or (ii) ownership of fifty percent (50%) or more
|
||||
of the outstanding shares, or (iii) beneficial ownership of such entity.
|
||||
|
||||
|
||||
|
||||
|
||||
"You" (or "Your") shall mean an individual or Legal Entity exercising permissions
|
||||
granted by this License.
|
||||
|
||||
|
||||
|
||||
|
||||
"Source" form shall mean the preferred form for making modifications, including
|
||||
but not limited to software source code, documentation source, and configuration
|
||||
files.
|
||||
|
||||
|
||||
|
||||
|
||||
"Object" form shall mean any form resulting from mechanical transformation
|
||||
or translation of a Source form, including but not limited to compiled object
|
||||
code, generated documentation, and conversions to other media types.
|
||||
|
||||
|
||||
|
||||
|
||||
"Work" shall mean the work of authorship, whether in Source or Object form,
|
||||
made available under the License, as indicated by a copyright notice that
|
||||
is included in or attached to the work (an example is provided in the Appendix
|
||||
below).
|
||||
|
||||
|
||||
|
||||
|
||||
"Derivative Works" shall mean any work, whether in Source or Object form,
|
||||
that is based on (or derived from) the Work and for which the editorial revisions,
|
||||
@@ -59,7 +59,7 @@ original work of authorship. For the purposes of this License, Derivative
|
||||
Works shall not include works that remain separable from, or merely link (or
|
||||
bind by name) to the interfaces of, the Work and Derivative Works thereof.
|
||||
|
||||
|
||||
|
||||
|
||||
"Contribution" shall mean any work of authorship, including the original version
|
||||
of the Work and any modifications or additions to that Work or Derivative
|
||||
@@ -74,7 +74,7 @@ for the purpose of discussing and improving the Work, but excluding communicatio
|
||||
that is conspicuously marked or otherwise designated in writing by the copyright
|
||||
owner as "Not a Contribution."
|
||||
|
||||
|
||||
|
||||
|
||||
"Contributor" shall mean Licensor and any individual or Legal Entity on behalf
|
||||
of whom a Contribution has been received by Licensor and subsequently incorporated
|
||||
|
||||
@@ -1,7 +1,6 @@
|
||||
.PHONY: all
|
||||
DOCKER_TAG ?= minitor-go-${USER}
|
||||
|
||||
.PHONY: default
|
||||
.PHONY: test
|
||||
default: test
|
||||
|
||||
.PHONY: build
|
||||
@@ -15,10 +14,6 @@ minitor-go:
|
||||
run: minitor-go build
|
||||
./minitor-go -debug
|
||||
|
||||
.PHONY: run-metrics
|
||||
run-metrics: minitor-go build
|
||||
./minitor-go -debug -metrics
|
||||
|
||||
.PHONY: test
|
||||
test:
|
||||
go test -coverprofile=coverage.out
|
||||
@@ -29,15 +24,6 @@ test:
|
||||
@go tool cover -func=coverage.out | awk -v target=80.0% \
|
||||
'/^total:/ { print "Total coverage: " $$3 " Minimum coverage: " target; if ($$3+0.0 >= target+0.0) print "ok"; else { print "fail"; exit 1; } }'
|
||||
|
||||
# Installs pre-commit hooks
|
||||
.PHONY: install-hooks
|
||||
install-hooks:
|
||||
pre-commit install --install-hooks
|
||||
|
||||
# Checks files for encryption
|
||||
.PHONY: check
|
||||
check:
|
||||
pre-commit run --all-files
|
||||
|
||||
.PHONY: clean
|
||||
clean:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
A reimplementation of [Minitor](https://git.iamthefij/iamthefij/minitor) in Go
|
||||
|
||||
Minitor is already a minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a large footprint. Thus Go feels like a better fit for the project, longer term.
|
||||
Minitor is already a very minimal monitoring tool. Python 3 was a quick way to get something live, but Python itself comes with a very large footprint.Thus Go feels like a better fit for the project, longer term.
|
||||
|
||||
Initial target is meant to be roughly compatible requiring only minor changes to configuration. Future iterations may diverge to take advantage of Go specific features.
|
||||
|
||||
@@ -30,7 +30,7 @@ monitors:
|
||||
command_shell: echo 'test'
|
||||
```
|
||||
|
||||
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct, `AlertNotice` defined in `alert.go` and the built in Go templating format. Eg.
|
||||
Second, templating for Alert messages has been updated. In the Python version, `str.format(...)` was used with certain keys passed in that could be used to format messages. In the Go version, we use a struct containing Alert info and the built in Go templating format. Eg.
|
||||
|
||||
minitor-py:
|
||||
```yaml
|
||||
@@ -38,7 +38,7 @@ alerts:
|
||||
log_command:
|
||||
command: ['echo', '{monitor_name}']
|
||||
log_shell:
|
||||
command_shell: 'echo {monitor_name}'
|
||||
command_shell: "echo {monitor_name}"
|
||||
```
|
||||
|
||||
minitor-go:
|
||||
@@ -47,7 +47,7 @@ alerts:
|
||||
log_command:
|
||||
command: ['echo', '{{.MonitorName}}']
|
||||
log_shell:
|
||||
command_shell: 'echo {{.MonitorName}}'
|
||||
command_shell: "echo {{.MonitorName}}"
|
||||
```
|
||||
|
||||
Finally, newlines in a shell command don't terminate a particular command. Semicolons must be used and continuations should not.
|
||||
@@ -84,11 +84,10 @@ Pairity:
|
||||
- [x] Run alert commands
|
||||
- [x] Run alert commands in a shell
|
||||
- [x] Allow templating of alert commands
|
||||
- [x] Implement Prometheus client to export metrics
|
||||
- [x] Test coverage
|
||||
- [ ] Integration testing (manual or otherwise)
|
||||
- [ ] Implement Prometheus client to export metrics
|
||||
- [ ] Test coverage
|
||||
|
||||
Improvement (potentially breaking):
|
||||
Improvement:
|
||||
|
||||
- [ ] Implement leveled logging (maybe glog or logrus)
|
||||
- [ ] Consider switching from YAML to TOML
|
||||
@@ -96,4 +95,3 @@ Improvement (potentially breaking):
|
||||
- [ ] Consider dropping `alert_up` and `alert_down` in favor of using Go templates that offer more control of messaging
|
||||
- [ ] Async checking
|
||||
- [ ] Use durations rather than seconds checked in event loop
|
||||
- [ ] Revisit metrics and see if they all make sense
|
||||
|
||||
@@ -3,11 +3,10 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"log"
|
||||
"os/exec"
|
||||
"text/template"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Alert is a config driven mechanism for sending a notice
|
||||
@@ -39,7 +38,9 @@ func (alert Alert) IsValid() bool {
|
||||
|
||||
// BuildTemplates compiles command templates for the Alert
|
||||
func (alert *Alert) BuildTemplates() error {
|
||||
log.Debugf("Building template for alert %s", alert.Name)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Building template for alert %s", alert.Name)
|
||||
}
|
||||
if alert.commandTemplate == nil && alert.Command != nil {
|
||||
alert.commandTemplate = []*template.Template{}
|
||||
for i, cmdPart := range alert.Command {
|
||||
@@ -59,8 +60,8 @@ func (alert *Alert) BuildTemplates() error {
|
||||
}
|
||||
|
||||
// Send will send an alert notice by executing the command template
|
||||
func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||
log.Infof("Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
func (alert Alert) Send(notice AlertNotice) (output_str string, err error) {
|
||||
log.Printf("INFO: Sending alert %s for %s", alert.Name, notice.MonitorName)
|
||||
var cmd *exec.Cmd
|
||||
if alert.commandTemplate != nil {
|
||||
command := []string{}
|
||||
@@ -94,8 +95,10 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
|
||||
|
||||
var output []byte
|
||||
output, err = cmd.CombinedOutput()
|
||||
outputStr = string(output)
|
||||
log.Debugf("Alert output for: %s\n---\n%s\n---", alert.Name, outputStr)
|
||||
output_str = string(output)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Alert output for: %s\n---\n%s\n---", alert.Name, output_str)
|
||||
}
|
||||
|
||||
return outputStr, err
|
||||
return output_str, err
|
||||
}
|
||||
|
||||
+12
-13
@@ -1,9 +1,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"testing"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func TestAlertIsValid(t *testing.T) {
|
||||
@@ -23,13 +22,13 @@ func TestAlertIsValid(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Debugf("Testing case %s", c.name)
|
||||
log.Printf("Testing case %s", c.name)
|
||||
actual := c.alert.IsValid()
|
||||
if actual != c.expected {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expected, actual)
|
||||
log.Debugf("Case failed: %s", c.name)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Debugf("-----")
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -72,19 +71,19 @@ func TestAlertSend(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Debugf("Testing case %s", c.name)
|
||||
log.Printf("Testing case %s", c.name)
|
||||
c.alert.BuildTemplates()
|
||||
output, err := c.alert.Send(c.notice)
|
||||
hasErr := (err != nil)
|
||||
if output != c.expectedOutput {
|
||||
t.Errorf("Send(%v output), expected=%v actual=%v", c.name, c.expectedOutput, output)
|
||||
log.Debugf("Case failed: %s", c.name)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("Send(%v err), expected=%v actual=%v", c.name, "Err", err)
|
||||
log.Debugf("Case failed: %s", c.name)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Debugf("-----")
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -95,7 +94,7 @@ func TestAlertSendNoTemplates(t *testing.T) {
|
||||
if err == nil {
|
||||
t.Errorf("Send(no template), expected=%v actual=%v", "Err", output)
|
||||
}
|
||||
log.Debugf("-----")
|
||||
log.Println("-----")
|
||||
}
|
||||
|
||||
func TestAlertBuildTemplate(t *testing.T) {
|
||||
@@ -110,13 +109,13 @@ func TestAlertBuildTemplate(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, c := range cases {
|
||||
log.Debugf("Testing case %s", c.name)
|
||||
log.Printf("Testing case %s", c.name)
|
||||
err := c.alert.BuildTemplates()
|
||||
hasErr := (err != nil)
|
||||
if hasErr != c.expectErr {
|
||||
t.Errorf("IsValid(%v), expected=%t actual=%t", c.name, c.expectErr, err)
|
||||
log.Debugf("Case failed: %s", c.name)
|
||||
log.Printf("Case failed: %s", c.name)
|
||||
}
|
||||
log.Debugf("-----")
|
||||
log.Println("-----")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,11 +2,10 @@ package main
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
"gopkg.in/yaml.v2"
|
||||
"io/ioutil"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
// Config type is contains all provided user configuration
|
||||
@@ -22,20 +21,20 @@ func (config Config) IsValid() (isValid bool) {
|
||||
|
||||
// Validate monitors
|
||||
if config.Monitors == nil || len(config.Monitors) == 0 {
|
||||
log.Errorf("Invalid monitor configuration: Must provide at least one monitor")
|
||||
log.Printf("ERROR: Invalid monitor configuration: Must provide at least one monitor")
|
||||
isValid = false
|
||||
}
|
||||
for _, monitor := range config.Monitors {
|
||||
if !monitor.IsValid() {
|
||||
log.Errorf("Invalid monitor configuration: %s", monitor.Name)
|
||||
log.Printf("ERROR: Invalid monitor configuration: %s", monitor.Name)
|
||||
isValid = false
|
||||
}
|
||||
// Check that all Monitor alerts actually exist
|
||||
for _, isUp := range []bool{true, false} {
|
||||
for _, alertName := range monitor.GetAlertNames(isUp) {
|
||||
if _, ok := config.Alerts[alertName]; !ok {
|
||||
log.Errorf(
|
||||
"Invalid monitor configuration: %s. Unknown alert %s",
|
||||
log.Printf(
|
||||
"ERROR: Invalid monitor configuration: %s. Unknown alert %s",
|
||||
monitor.Name, alertName,
|
||||
)
|
||||
isValid = false
|
||||
@@ -46,12 +45,12 @@ func (config Config) IsValid() (isValid bool) {
|
||||
|
||||
// Validate alerts
|
||||
if config.Alerts == nil || len(config.Alerts) == 0 {
|
||||
log.Errorf("Invalid alert configuration: Must provide at least one alert")
|
||||
log.Printf("ERROR: Invalid alert configuration: Must provide at least one alert")
|
||||
isValid = false
|
||||
}
|
||||
for _, alert := range config.Alerts {
|
||||
if !alert.IsValid() {
|
||||
log.Errorf("Invalid alert configuration: %s", alert.Name)
|
||||
log.Printf("ERROR: Invalid alert configuration: %s", alert.Name)
|
||||
isValid = false
|
||||
}
|
||||
}
|
||||
@@ -85,7 +84,7 @@ func LoadConfig(filePath string) (config Config, err error) {
|
||||
return
|
||||
}
|
||||
|
||||
log.Debugf("Config values:\n%v\n", config)
|
||||
log.Printf("config:\n%v\n", config)
|
||||
|
||||
if !config.IsValid() {
|
||||
err = errors.New("Invalid configuration")
|
||||
|
||||
@@ -4,6 +4,5 @@ go 1.12
|
||||
|
||||
require (
|
||||
github.com/prometheus/client_golang v1.2.1
|
||||
github.com/sirupsen/logrus v1.4.2
|
||||
gopkg.in/yaml.v2 v2.2.4
|
||||
)
|
||||
|
||||
@@ -53,7 +53,6 @@ github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsT
|
||||
github.com/prometheus/procfs v0.0.5 h1:3+auTFlqw+ZaQYJARz6ArODtkaIwtvBTx3N2NehQlL8=
|
||||
github.com/prometheus/procfs v0.0.5/go.mod h1:4A/X28fw3Fc593LaREMrKMqOKvUAntwMDaekg4FpcdQ=
|
||||
github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo=
|
||||
github.com/sirupsen/logrus v1.4.2 h1:SPIRibHv4MatM3XXNO2BJeFLZwZ2LvZgfQ5+UNI2im4=
|
||||
github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE=
|
||||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
github.com/stretchr/objx v0.1.1/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
|
||||
@@ -69,7 +68,6 @@ golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5h
|
||||
golang.org/x/sys v0.0.0-20181116152217-5ac8a444bdc5/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47 h1:/XfQ9z7ib8eEJX2hdgFTZJ/ntt0swNk5oYBziWeTCvY=
|
||||
golang.org/x/sys v0.0.0-20191010194322-b09406accb47/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
|
||||
|
||||
@@ -3,18 +3,18 @@ package main
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"log"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var (
|
||||
// LogDebug will control whether debug messsages should be logged
|
||||
LogDebug = false
|
||||
|
||||
// ExportMetrics will track whether or not we want to export metrics to prometheus
|
||||
ExportMetrics = false
|
||||
// MetricsPort is the port to expose metrics on
|
||||
MetricsPort = 8080
|
||||
// Metrics contains all active metrics
|
||||
Metrics = NewMetrics()
|
||||
|
||||
// version of minitor being run
|
||||
version = "dev"
|
||||
@@ -23,17 +23,13 @@ var (
|
||||
func checkMonitors(config *Config) error {
|
||||
for _, monitor := range config.Monitors {
|
||||
if monitor.ShouldCheck() {
|
||||
success, alertNotice := monitor.Check()
|
||||
|
||||
hasAlert := alertNotice != nil
|
||||
|
||||
// Track status metrics
|
||||
Metrics.SetMonitorStatus(monitor.Name, success)
|
||||
Metrics.CountCheck(monitor.Name, success, hasAlert)
|
||||
_, alertNotice := monitor.Check()
|
||||
|
||||
// Should probably consider refactoring everything below here
|
||||
if alertNotice != nil {
|
||||
log.Debugf("Recieved an alert notice from %s", alertNotice.MonitorName)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Recieved an alert notice from %s", alertNotice.MonitorName)
|
||||
}
|
||||
alertNames := monitor.GetAlertNames(alertNotice.IsUp)
|
||||
if alertNames == nil {
|
||||
// This should only happen for a recovery alert. AlertDown is validated not empty
|
||||
@@ -59,9 +55,6 @@ func checkMonitors(config *Config) error {
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
// Count alert metrics
|
||||
Metrics.CountAlert(monitor.Name, alert.Name)
|
||||
} else {
|
||||
// This case should never actually happen since we validate against it
|
||||
log.Printf("ERROR: Unknown alert for monitor %s: %s", alertNotice.MonitorName, alertName)
|
||||
@@ -75,18 +68,18 @@ func checkMonitors(config *Config) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func serveMetrics() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
_ = http.ListenAndServe(":8080", nil)
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Get debug flag
|
||||
var debug = flag.Bool("debug", false, "Enables debug logs (default: false)")
|
||||
flag.BoolVar(&LogDebug, "debug", false, "Enables debug logs (default: false)")
|
||||
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
|
||||
var showVersion = flag.Bool("version", false, "Display the version of minitor and exit")
|
||||
flag.Parse()
|
||||
|
||||
// Set debug if flag is set
|
||||
if *debug {
|
||||
log.SetLevel(log.DebugLevel)
|
||||
}
|
||||
|
||||
// Print version if flag is provided
|
||||
if *showVersion {
|
||||
log.Println("Minitor version:", version)
|
||||
@@ -102,7 +95,7 @@ func main() {
|
||||
// Serve metrics exporter, if specified
|
||||
if ExportMetrics {
|
||||
log.Println("INFO: Exporting metrics to Prometheus")
|
||||
go ServeMetrics()
|
||||
go serveMetrics()
|
||||
}
|
||||
|
||||
// Start main loop
|
||||
|
||||
-101
@@ -1,101 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// TODO: Not sure if this is the best way to handle. A global instance for
|
||||
// metrics isn't bad, but it might be nice to curry versions of the metrics
|
||||
// for each monitor. Especially since every monitor has it's own. Perhaps
|
||||
// another new function that essentially curries each metric for a given
|
||||
// monitor name would do. This could be run when validating monitors and
|
||||
// initializing alert templates.
|
||||
|
||||
// MinitorMetrics contains all counters and metrics that Minitor will need to access
|
||||
type MinitorMetrics struct {
|
||||
alertCount *prometheus.CounterVec
|
||||
checkCount *prometheus.CounterVec
|
||||
monitorStatus *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// NewMetrics creates and initializes all metrics
|
||||
func NewMetrics() *MinitorMetrics {
|
||||
// Initialize all metrics
|
||||
metrics := &MinitorMetrics{
|
||||
alertCount: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "minitor_alert_total",
|
||||
Help: "Number of Minitor alerts",
|
||||
},
|
||||
[]string{"alert", "monitor"},
|
||||
),
|
||||
checkCount: prometheus.NewCounterVec(
|
||||
prometheus.CounterOpts{
|
||||
Name: "minitor_check_total",
|
||||
Help: "Number of Minitor checks",
|
||||
},
|
||||
[]string{"monitor", "status", "is_alert"},
|
||||
),
|
||||
monitorStatus: prometheus.NewGaugeVec(
|
||||
prometheus.GaugeOpts{
|
||||
Name: "minitor_monitor_up_count",
|
||||
Help: "Status of currently responsive monitors",
|
||||
},
|
||||
[]string{"monitor"},
|
||||
),
|
||||
}
|
||||
|
||||
// Register newly created metrics
|
||||
prometheus.MustRegister(metrics.alertCount)
|
||||
prometheus.MustRegister(metrics.checkCount)
|
||||
prometheus.MustRegister(metrics.monitorStatus)
|
||||
|
||||
return metrics
|
||||
}
|
||||
|
||||
// SetMonitorStatus sets the current status of Monitor
|
||||
func (metrics *MinitorMetrics) SetMonitorStatus(monitor string, isUp bool) {
|
||||
val := 0.0
|
||||
if isUp {
|
||||
val = 1.0
|
||||
}
|
||||
metrics.monitorStatus.With(prometheus.Labels{"monitor": monitor}).Set(val)
|
||||
}
|
||||
|
||||
// CountCheck counts the result of a particular Monitor check
|
||||
func (metrics *MinitorMetrics) CountCheck(monitor string, isSuccess bool, isAlert bool) {
|
||||
status := "failure"
|
||||
if isSuccess {
|
||||
status = "success"
|
||||
}
|
||||
|
||||
alertVal := "false"
|
||||
if isAlert {
|
||||
alertVal = "true"
|
||||
}
|
||||
|
||||
metrics.checkCount.With(
|
||||
prometheus.Labels{"monitor": monitor, "status": status, "is_alert": alertVal},
|
||||
).Inc()
|
||||
}
|
||||
|
||||
// CountAlert counts an alert
|
||||
func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
|
||||
metrics.alertCount.With(
|
||||
prometheus.Labels{
|
||||
"alert": alert,
|
||||
"monitor": monitor,
|
||||
},
|
||||
).Inc()
|
||||
}
|
||||
|
||||
// ServeMetrics starts an http server with a Prometheus metrics handler
|
||||
func ServeMetrics() {
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
host := fmt.Sprintf(":%d", MetricsPort)
|
||||
_ = http.ListenAndServe(host, nil)
|
||||
}
|
||||
+24
-19
@@ -1,11 +1,10 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"log"
|
||||
"math"
|
||||
"os/exec"
|
||||
"time"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Monitor represents a particular periodic check of a command
|
||||
@@ -71,18 +70,20 @@ func (monitor *Monitor) Check() (bool, *AlertNotice) {
|
||||
alertNotice = monitor.failure()
|
||||
}
|
||||
|
||||
log.Debugf("Command output: %s", monitor.lastOutput)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command output: %s", monitor.lastOutput)
|
||||
}
|
||||
if err != nil {
|
||||
log.Debugf("Command result: %v", err)
|
||||
if LogDebug {
|
||||
log.Printf("DEBUG: Command result: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.WithFields(log.Fields{
|
||||
"monitor": monitor.Name,
|
||||
"success": isSuccess,
|
||||
"alert": alertNotice != nil,
|
||||
}).Infof(
|
||||
"%s checked",
|
||||
log.Printf(
|
||||
"INFO: %s success=%t, alert=%t",
|
||||
monitor.Name,
|
||||
isSuccess,
|
||||
alertNotice != nil,
|
||||
)
|
||||
|
||||
return isSuccess, alertNotice
|
||||
@@ -108,13 +109,15 @@ func (monitor *Monitor) failure() (notice *AlertNotice) {
|
||||
monitor.failureCount++
|
||||
// If we haven't hit the minimum failures, we can exit
|
||||
if monitor.failureCount < monitor.getAlertAfter() {
|
||||
log.Debugf(
|
||||
"%s failed but did not hit minimum failures. "+
|
||||
"Count: %v alert after: %v",
|
||||
monitor.Name,
|
||||
monitor.failureCount,
|
||||
monitor.getAlertAfter(),
|
||||
)
|
||||
if LogDebug {
|
||||
log.Printf(
|
||||
"DEBUG: %s failed but did not hit minimum failures. "+
|
||||
"Count: %v alert after: %v",
|
||||
monitor.Name,
|
||||
monitor.failureCount,
|
||||
monitor.getAlertAfter(),
|
||||
)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -152,16 +155,18 @@ func (monitor Monitor) getAlertAfter() int16 {
|
||||
// Zero is one!
|
||||
if monitor.AlertAfter == 0 {
|
||||
return 1
|
||||
} else {
|
||||
return monitor.AlertAfter
|
||||
}
|
||||
return monitor.AlertAfter
|
||||
}
|
||||
|
||||
// GetAlertNames gives a list of alert names for a given monitor status
|
||||
func (monitor Monitor) GetAlertNames(up bool) []string {
|
||||
if up {
|
||||
return monitor.AlertUp
|
||||
} else {
|
||||
return monitor.AlertDown
|
||||
}
|
||||
return monitor.AlertDown
|
||||
}
|
||||
|
||||
func (monitor Monitor) createAlertNotice(isUp bool) *AlertNotice {
|
||||
|
||||
+12
-24
@@ -1,41 +1,29 @@
|
||||
---
|
||||
check_interval: 5
|
||||
check_interval: 30
|
||||
|
||||
monitors:
|
||||
- name: Fake Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://minitor.mon']
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 10 # Must be at minimum the global `check_interval`
|
||||
- name: My Website
|
||||
command: [ 'curl', '-s', '-o', '/dev/null', 'https://minitor.mon' ]
|
||||
alert_down: [ log, mailgun_down, sms_down ]
|
||||
alert_up: [ log, email_up ]
|
||||
check_interval: 30 # Must be at minimum the global `check_interval`
|
||||
alert_after: 3
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
- name: Real Website
|
||||
command: ['curl', '-s', '-o', '/dev/null', 'https://google.com']
|
||||
alert_down: [log_down, mailgun_down, sms_down]
|
||||
alert_up: [log_up, email_up]
|
||||
check_interval: 5
|
||||
alert_after: 3
|
||||
alert_every: -1
|
||||
alert_every: -1 # Defaults to -1 for exponential backoff. 0 to disable repeating
|
||||
|
||||
alerts:
|
||||
log_down:
|
||||
command: ["echo", "Minitor failure for {{.MonitorName}}"]
|
||||
log_up:
|
||||
command: ["echo", "Minitor recovery for {{.MonitorName}}"]
|
||||
email_up:
|
||||
command: [sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!"]
|
||||
command: [ sendmail, "me@minitor.mon", "Recovered: {monitor_name}", "We're back!" ]
|
||||
mailgun_down:
|
||||
command_shell: >
|
||||
command: >
|
||||
curl -s -X POST
|
||||
-F subject="Alert! {{.MonitorName}} failed"
|
||||
-F subject="Alert! {monitor_name} failed"
|
||||
-F from="Minitor <minitor@minitor.mon>"
|
||||
-F to=me@minitor.mon
|
||||
-F text="Our monitor failed"
|
||||
https://api.mailgun.net/v3/minitor.mon/messages
|
||||
-u "api:${MAILGUN_API_KEY}"
|
||||
sms_down:
|
||||
command_shell: >
|
||||
curl -s -X POST -F "Body=Failure! {{.MonitorName}} has failed"
|
||||
command: >
|
||||
curl -s -X POST -F "Body=Failure! {monitor_name} has failed"
|
||||
-F "From=${AVAILABLE_NUMBER}" -F "To=${MY_PHONE}"
|
||||
"https://api.twilio.com/2010-04-01/Accounts/${ACCOUNT_SID}/Messages"
|
||||
-u "${ACCOUNT_SID}:${AUTH_TOKEN}"
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
# Minitor Scripts
|
||||
|
||||
A collection of some handy scripts to use with Minitor
|
||||
|
||||
These are not included with the Python package, but they are included in the Docker image in `/app/scripts`.
|
||||
@@ -1,51 +0,0 @@
|
||||
#! /bin/bash
|
||||
set -e
|
||||
|
||||
#################
|
||||
# docker_check.sh
|
||||
#
|
||||
# Checks the most recent state exit code of a Docker container
|
||||
#################
|
||||
|
||||
# Docker host will default to a socket
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
local path="$1"
|
||||
if [ "$DOCKER_HOST" == "socket" ]; then
|
||||
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
|
||||
else
|
||||
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# Returns caintainer ID for a given container name
|
||||
function get_container_id {
|
||||
local container_name="$1"
|
||||
curl_docker 'containers/json?all=1' \
|
||||
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
|
||||
}
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id=$1
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Will exit with the last status code of continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id $container_name)
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
exit_code=$(inspect_container "$container_id" | jq -r .State.ExitCode)
|
||||
|
||||
exit "$exit_code"
|
||||
@@ -1,61 +0,0 @@
|
||||
#! /bin/bash
|
||||
set -e
|
||||
|
||||
#################
|
||||
# docker_healthcheck.sh
|
||||
#
|
||||
# Returns the results of a Docker Healthcheck for a container
|
||||
#################
|
||||
|
||||
# Docker host will default to a socket
|
||||
# To override, export DOCKER_HOST to a new hostname
|
||||
DOCKER_HOST="${DOCKER_HOST:=socket}"
|
||||
container_name="$1"
|
||||
|
||||
# Curls Docker either using a socket or URL
|
||||
function curl_docker {
|
||||
local path="$1"
|
||||
if [ "$DOCKER_HOST" == "socket" ]; then
|
||||
curl --unix-socket /var/run/docker.sock "http://localhost/$path" 2>/dev/null
|
||||
else
|
||||
curl "http://${DOCKER_HOST}/$path" 2>/dev/null
|
||||
fi
|
||||
}
|
||||
|
||||
# Returns caintainer ID for a given container name
|
||||
function get_container_id {
|
||||
local container_name="$1"
|
||||
curl_docker 'containers/json?all=1' \
|
||||
| jq -r ".[] | {Id, Name: .Names[]} | select(.Name == \"/${container_name}\") | .Id"
|
||||
}
|
||||
|
||||
# Returns container JSON
|
||||
function inspect_container {
|
||||
local container_id="$1"
|
||||
curl_docker "containers/$container_id/json"
|
||||
}
|
||||
|
||||
if [ -z "$container_name" ]; then
|
||||
echo "Usage: $0 container_name"
|
||||
echo "Will return results of healthcheck for continer with provided name"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
container_id=$(get_container_id "$container_name")
|
||||
if [ -z "$container_id" ]; then
|
||||
echo "ERROR: Could not find container with name: $container_name"
|
||||
exit 1
|
||||
fi
|
||||
health=$(inspect_container "$container_id" | jq -r '.State.Health.Status')
|
||||
|
||||
case "$health" in
|
||||
null)
|
||||
echo "No healthcheck results"
|
||||
;;
|
||||
starting|healthy)
|
||||
echo "Status: '$health'"
|
||||
;;
|
||||
*)
|
||||
echo "Status: '$health'"
|
||||
exit 1
|
||||
esac
|
||||
@@ -6,3 +6,4 @@ monitors:
|
||||
alert_down: [ 'alert_down', 'log_shell', 'log_command' ]
|
||||
# alert_every: -1
|
||||
alert_every: 0
|
||||
|
||||
|
||||
Reference in New Issue
Block a user