Compare commits

..

1 Commits

Author SHA1 Message Date
Ian Fijolek
0a36da79d6 Add health check and self reporting of health
This avoids panicing and instead provides an HTTP endpoint to report health
2024-04-03 11:23:26 -07:00
7 changed files with 267 additions and 48 deletions
+6 -6
View File
@@ -4,7 +4,7 @@ go 1.20
require ( require (
git.iamthefij.com/iamthefij/slog v1.3.0 git.iamthefij.com/iamthefij/slog v1.3.0
github.com/prometheus/client_golang v1.19.0 github.com/prometheus/client_golang v1.15.0
gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v2 v2.4.0
) )
@@ -14,10 +14,10 @@ require (
github.com/golang/protobuf v1.5.3 // indirect github.com/golang/protobuf v1.5.3 // indirect
github.com/kr/text v0.2.0 // indirect github.com/kr/text v0.2.0 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect github.com/matttproud/golang_protobuf_extensions v1.0.4 // indirect
github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/client_model v0.3.0 // indirect
github.com/prometheus/common v0.48.0 // indirect github.com/prometheus/common v0.42.0 // indirect
github.com/prometheus/procfs v0.12.0 // indirect github.com/prometheus/procfs v0.9.0 // indirect
github.com/rogpeppe/go-internal v1.10.0 // indirect github.com/rogpeppe/go-internal v1.10.0 // indirect
golang.org/x/sys v0.16.0 // indirect golang.org/x/sys v0.6.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect google.golang.org/protobuf v1.30.0 // indirect
) )
-14
View File
@@ -182,16 +182,12 @@ github.com/prometheus/client_golang v1.12.1/go.mod h1:3Z9XVyYiZYEO+YQWt3RD2R3jrb
github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y= github.com/prometheus/client_golang v1.14.0/go.mod h1:8vpkKitgIVNcqrRBWh1C4TIUQgYNtG/XQE4E/Zae36Y=
github.com/prometheus/client_golang v1.15.0 h1:5fCgGYogn0hFdhyhLbw7hEsWxufKtY9klyvdNfFlFhM= github.com/prometheus/client_golang v1.15.0 h1:5fCgGYogn0hFdhyhLbw7hEsWxufKtY9klyvdNfFlFhM=
github.com/prometheus/client_golang v1.15.0/go.mod h1:e9yaBhRPU2pPNsZwE+JdQl0KEt1N9XgF6zxWmaC0xOk= github.com/prometheus/client_golang v1.15.0/go.mod h1:e9yaBhRPU2pPNsZwE+JdQl0KEt1N9XgF6zxWmaC0xOk=
github.com/prometheus/client_golang v1.19.0 h1:ygXvpU1AoN1MhdzckN+PyD9QJOSD4x7kmXYlnfbA6JU=
github.com/prometheus/client_golang v1.19.0/go.mod h1:ZRM9uEAypZakd+q/x7+gmsvXdURP+DABIEIjnmDdp+k=
github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo= github.com/prometheus/client_model v0.0.0-20180712105110-5c3871d89910/go.mod h1:MbSGuTsp3dbXC40dX6PRTWyKYBIrTGTE9sqQNg2J8bo=
github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190129233127-fd36f4220a90/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.2.0/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA=
github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4= github.com/prometheus/client_model v0.3.0 h1:UBgGFHqYdG/TPFD1B1ogZywDqEkwp3fBMvqdiQ7Xew4=
github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w= github.com/prometheus/client_model v0.3.0/go.mod h1:LDGWKZIo7rky3hgvBe+caln+Dr3dPggB5dvjtD7w9+w=
github.com/prometheus/client_model v0.5.0 h1:VQw1hfvPvk3Uv6Qf29VrPF32JB6rtbgI6cYPYQjL0Qw=
github.com/prometheus/client_model v0.5.0/go.mod h1:dTiFglRmd66nLR9Pv9f0mZi7B7fk5Pm3gvsjB5tr+kI=
github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4= github.com/prometheus/common v0.4.1/go.mod h1:TNfzLD0ON7rHzMJeJkieUDPYmFC7Snx/y86RQel1bk4=
github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo= github.com/prometheus/common v0.10.0/go.mod h1:Tlit/dnDKsSWFlCLTWaA1cyBgKHSMdTB80sz/V91rCo=
github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc= github.com/prometheus/common v0.26.0/go.mod h1:M7rCNAaPfAosfx8veZJCuw84e35h3Cfd9VFqTh1DIvc=
@@ -199,8 +195,6 @@ github.com/prometheus/common v0.32.1/go.mod h1:vu+V0TpY+O6vW9J44gczi3Ap/oXXR10b+
github.com/prometheus/common v0.37.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA= github.com/prometheus/common v0.37.0/go.mod h1:phzohg0JFMnBEFGxTDbfu3QyL5GI8gTQJFhYO5B3mfA=
github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM= github.com/prometheus/common v0.42.0 h1:EKsfXEYo4JpWMHH5cg+KOUWeuJSov1Id8zGR8eeI1YM=
github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc= github.com/prometheus/common v0.42.0/go.mod h1:xBwqVerjNdUDjgODMpudtOMwlOwf2SaTr1yjz4b7Zbc=
github.com/prometheus/common v0.48.0 h1:QO8U2CdOzSn1BBsmXJXduaaW+dY/5QLjfB8svtSzKKE=
github.com/prometheus/common v0.48.0/go.mod h1:0/KsvlIEfPQCQ5I2iNSAWKPZziNCvRs5EC6ILDTlAPc=
github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk= github.com/prometheus/procfs v0.0.0-20181005140218-185b4288413d/go.mod h1:c3At6R/oaqEKCNdg8wHV1ftS6bRYblBhIjjI8uT2IGk=
github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA= github.com/prometheus/procfs v0.0.2/go.mod h1:TjEm7ze935MbeOT/UhFTIMYKhuLP4wbCsTZCD3I8kEA=
github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU= github.com/prometheus/procfs v0.1.3/go.mod h1:lV6e/gmhEcM9IjHGsFOCxxuZ+z1YqCvr4OA4YeYWdaU=
@@ -209,8 +203,6 @@ github.com/prometheus/procfs v0.7.3/go.mod h1:cz+aTbrPOrUb4q7XlbU9ygM+/jj0fzG6c1
github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4= github.com/prometheus/procfs v0.8.0/go.mod h1:z7EfXMXOkbkqb9IINtpCn86r/to3BnA0uaxHdg830/4=
github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI= github.com/prometheus/procfs v0.9.0 h1:wzCHvIvM5SxWqYvwgVL7yJY8Lz3PKn49KQtpgMYJfhI=
github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY= github.com/prometheus/procfs v0.9.0/go.mod h1:+pB4zwohETzFnmlpe6yd2lSc+0/46IYZRB/chUwxUZY=
github.com/prometheus/procfs v0.12.0 h1:jluTpSng7V9hY0O2R9DzzJHYb2xULk9VTR1V1R/k6Bo=
github.com/prometheus/procfs v0.12.0/go.mod h1:pcuDEFsWDnvcgNzo4EEweacyhjeA9Zk3cnaOZAZEfOo=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4= github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs= github.com/rogpeppe/go-internal v1.9.0/go.mod h1:WtVeX8xhTBvf0smdhujwtBcq4Qrzq/fJaraNFVN+nFs=
github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ=
@@ -373,8 +365,6 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ= golang.org/x/sys v0.6.0 h1:MVltZSvRTcU2ljQOhs94SXPftV6DCNnZViHeQps87pQ=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.16.0 h1:xWw16ngr6ZMtmxDyKyIgsE93KNKz5HKmMa3b8ALHidU=
golang.org/x/sys v0.16.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
@@ -514,10 +504,6 @@ google.golang.org/protobuf v1.28.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw
google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.28.1/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng= google.golang.org/protobuf v1.30.0 h1:kPPoIgf3TsEvrm0PFe15JQ+570QVxYzEvvHqChK+cng=
google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I= google.golang.org/protobuf v1.30.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
google.golang.org/protobuf v1.32.0 h1:pPC6BG5ex8PDFnkbrGU3EixyhKcQ2aDuBS36lqK/C7I=
google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
google.golang.org/protobuf v1.33.0 h1:uNO2rsAINq/JlFpSdYEKIZ0uKD/R9cpdv0T+yoGwGmI=
google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos=
gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
+72
View File
@@ -0,0 +1,72 @@
package main
import (
"fmt"
"io"
"net/http"
"strings"
)
type HealthCheckHandler struct {
isMinitorHealthy bool
monitors []*Monitor
}
func NewHealthCheckHandler(monitors []*Monitor) *HealthCheckHandler {
return &HealthCheckHandler{
false,
monitors,
}
}
func (hch *HealthCheckHandler) MinitorHealthy(healthy bool) {
hch.isMinitorHealthy = healthy
}
func (hch HealthCheckHandler) MinitorHealthCheck() (bool, string) {
if hch.isMinitorHealthy {
return true, "OK"
} else {
return false, "UNHEALTHY"
}
}
func (hch HealthCheckHandler) MonitorsHealthCheck() (bool, string) {
downMonitors := []string{}
for _, monitor := range hch.monitors {
if !monitor.IsUp() {
downMonitors = append(downMonitors, monitor.Name)
}
}
if len(downMonitors) == 0 {
return true, "OK"
} else {
return false, fmt.Sprintf("UNHEALTHY: The following monitors are unhealthy: %s", strings.Join(downMonitors, ", "))
}
}
func (hch HealthCheckHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var healthy bool
var body string
if monitors := r.URL.Query().Get("monitors"); monitors != "" {
healthy, body = hch.MonitorsHealthCheck()
} else {
healthy, body = hch.MinitorHealthCheck()
}
if healthy {
w.WriteHeader(http.StatusOK)
} else {
w.WriteHeader(http.StatusServiceUnavailable)
}
_, _ = io.WriteString(w, body)
}
func HandleHealthCheck() {
http.Handle("/metrics", HealthChecks)
}
+79
View File
@@ -0,0 +1,79 @@
package main
import (
"testing"
)
func TestNewHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
monitors[0].alertCount++
if healthy, _ := hc.MinitorHealthCheck(); healthy {
t.Errorf("Initial hc state should be unhealthy until some successful alert is sent")
}
if healthy, _ := hc.MonitorsHealthCheck(); healthy {
t.Errorf("Faking an alert on the monitor pointer should make this unhealthy")
}
}
func TestMinitorHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
t.Run("MinitorHealthCheck(healthy)", func(t *testing.T) {
hc.MinitorHealthy(true)
healthy, body := hc.MinitorHealthCheck()
if !healthy {
t.Errorf("Expected healthy check")
}
if body != "OK" {
t.Errorf("Expected OK response")
}
})
t.Run("MinitorHealthCheck(unhealthy)", func(t *testing.T) {
hc.MinitorHealthy(false)
healthy, body := hc.MinitorHealthCheck()
if healthy {
t.Errorf("Expected healthy check")
}
if body != "UNHEALTHY" {
t.Errorf("Expected UNHEALTHY response")
}
})
}
func TestMonitorsHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
t.Run("MonitorsHealthCheck(healthy)", func(t *testing.T) {
healthy, body := hc.MonitorsHealthCheck()
if !healthy {
t.Errorf("Expected healthy check")
}
if body != "OK" {
t.Errorf("Expected OK response")
}
})
t.Run("MonitorsHealthCheck(unhealthy)", func(t *testing.T) {
monitors[0].alertCount++
healthy, body := hc.MonitorsHealthCheck()
if healthy {
t.Errorf("Expected healthy check")
}
if body != "UNHEALTHY: The following monitors are unhealthy: Test Monitor" {
t.Errorf("Expected UNHEALTHY response")
}
})
}
+45 -4
View File
@@ -4,6 +4,7 @@ import (
"errors" "errors"
"flag" "flag"
"fmt" "fmt"
"net/http"
"time" "time"
"git.iamthefij.com/iamthefij/slog" "git.iamthefij.com/iamthefij/slog"
@@ -16,6 +17,10 @@ var (
MetricsPort = 8080 MetricsPort = 8080
// Metrics contains all active metrics // Metrics contains all active metrics
Metrics = NewMetrics() Metrics = NewMetrics()
// Self monitor rather than panicing
SelfMonitor = false
// HealthChecks contains health check values
HealthChecks *HealthCheckHandler = nil
// PyCompat enables support for legacy Python templates // PyCompat enables support for legacy Python templates
PyCompat = false PyCompat = false
@@ -51,7 +56,13 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
output, output,
) )
if SelfMonitor {
Metrics.SetMonitorStatus(fmt.Sprintf("Alert %s", alertName), false)
}
return err return err
} else {
Metrics.SetMonitorStatus(fmt.Sprintf("Alert %s", alertName), true)
} }
// Count alert metrics // Count alert metrics
@@ -69,6 +80,8 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
func checkMonitors(config *Config) error { func checkMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions // TODO: Run this in goroutines and capture exceptions
healthy := true
for _, monitor := range config.Monitors { for _, monitor := range config.Monitors {
if monitor.ShouldCheck() { if monitor.ShouldCheck() {
success, alertNotice := monitor.Check() success, alertNotice := monitor.Check()
@@ -80,17 +93,32 @@ func checkMonitors(config *Config) error {
if alertNotice != nil { if alertNotice != nil {
err := sendAlerts(config, monitor, alertNotice) err := sendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, exit early and bubble it up // If there was an error in sending an alert, mark as unhealthy or bubble up
if err != nil { if err != nil {
return err if SelfMonitor {
healthy = false
} else {
return err
}
} }
} }
} }
} }
if HealthChecks != nil {
HealthChecks.MinitorHealthy(healthy)
}
return nil return nil
} }
// ServeMetricsAndHealth starts the default http server
func ServeMetricsAndHealth() {
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
func main() { func main() {
showVersion := flag.Bool("version", false, "Display the version of minitor and exit") showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)") configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
@@ -98,7 +126,9 @@ func main() {
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)") flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)") flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)") flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)") flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics and healthchecks should be exported on, if enabled. (default: 8080)")
flag.BoolVar(&SelfMonitor, "self-monitor", false, "Enables self-monitoring. Export metrics rather than panic when alerts fail. (default: false)")
flag.Parse() flag.Parse()
// Print version if flag is provided // Print version if flag is provided
@@ -115,8 +145,19 @@ func main() {
// Serve metrics exporter, if specified // Serve metrics exporter, if specified
if ExportMetrics { if ExportMetrics {
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort) slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
HandleMetrics()
}
go ServeMetrics() if SelfMonitor {
slog.Infof("Starting healthcheck endpoint on port %d", MetricsPort)
HealthChecks = NewHealthCheckHandler(config.Monitors)
HandleHealthCheck()
}
if ExportMetrics || SelfMonitor {
go ServeMetricsAndHealth()
} }
// Start main loop // Start main loop
+63 -17
View File
@@ -4,9 +4,10 @@ import "testing"
func TestCheckMonitors(t *testing.T) { func TestCheckMonitors(t *testing.T) {
cases := []struct { cases := []struct {
config Config config Config
expectErr bool expectErr bool
name string name string
selfMonitor bool
}{ }{
{ {
config: Config{}, config: Config{},
@@ -22,8 +23,9 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: false, expectErr: false,
name: "Monitor success, no alerts", name: "Monitor success, no alerts",
selfMonitor: false,
}, },
{ {
config: Config{ config: Config{
@@ -35,8 +37,9 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: false, expectErr: false,
name: "Monitor failure, no alerts", name: "Monitor failure, no alerts",
selfMonitor: false,
}, },
{ {
config: Config{ config: Config{
@@ -48,8 +51,9 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: false, expectErr: false,
name: "Monitor recovery, no alerts", name: "Monitor recovery, no alerts",
selfMonitor: false,
}, },
{ {
config: Config{ config: Config{
@@ -62,8 +66,9 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: true, expectErr: true,
name: "Monitor failure, unknown alerts", name: "Monitor failure, unknown alerts",
selfMonitor: false,
}, },
{ {
config: Config{ config: Config{
@@ -76,8 +81,24 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: true, expectErr: true,
name: "Monitor recovery, unknown alerts", name: "Monitor recovery, unknown alerts",
selfMonitor: false,
},
{
config: Config{
Monitors: []*Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
alertCount: 1,
},
},
},
expectErr: false,
name: "Monitor recovery, unknown alerts, with Health Check",
selfMonitor: true,
}, },
{ {
config: Config{ config: Config{
@@ -95,8 +116,9 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: false, expectErr: false,
name: "Monitor failure, successful alert", name: "Monitor failure, successful alert",
selfMonitor: false,
}, },
{ {
config: Config{ config: Config{
@@ -115,12 +137,36 @@ func TestCheckMonitors(t *testing.T) {
}, },
}, },
}, },
expectErr: true, expectErr: true,
name: "Monitor failure, bad alert", name: "Monitor failure, bad alert",
selfMonitor: false,
},
{
config: Config{
Monitors: []*Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"},
AlertAfter: 1,
},
},
Alerts: map[string]*Alert{
"bad": {
Name: "bad",
Command: CommandOrShell{Command: []string{"false"}},
},
},
},
expectErr: false,
name: "Monitor failure, bad alert, with Health Check",
selfMonitor: true,
}, },
} }
for _, c := range cases { for _, c := range cases {
SelfMonitor = c.selfMonitor
err := c.config.Init() err := c.config.Init()
if err != nil { if err != nil {
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err) t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
+2 -7
View File
@@ -1,7 +1,6 @@
package main package main
import ( import (
"fmt"
"net/http" "net/http"
"github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus"
@@ -107,11 +106,7 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
).Inc() ).Inc()
} }
// ServeMetrics starts an http server with a Prometheus metrics handler // HandleMetrics add Prometheus metrics handler to default http server
func ServeMetrics() { func HandleMetrics() {
http.Handle("/metrics", promhttp.Handler()) http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
} }