Compare commits

..

4 Commits

Author SHA1 Message Date
Ian Fijolek
cea16606ba Fix tests after removing legacy compatibility 2023-04-20 14:32:13 -07:00
Ian Fijolek
a03f430d0e Remove 'SecondsOrDuration' for check_interval
Now requires an explicit duration unit. Eg. 30s
2023-04-19 15:31:12 -07:00
Ian Fijolek
f3f7c215a7 Breaking: Remove python compat flag 2023-04-19 15:27:33 -07:00
Ian Fijolek
c75302bdb8 Add dig and update system package versions
Includes bump to alpine 3.17
2023-04-19 15:23:34 -07:00
17 changed files with 58 additions and 424 deletions
+2 -2
View File
@@ -4,7 +4,7 @@ name: test
steps:
- name: test
image: golang:1.20
image: golang:1.17
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
@@ -30,7 +30,7 @@ trigger:
steps:
- name: build all binaries
image: golang:1.20
image: golang:1.17
environment:
VERSION: ${DRONE_TAG:-${DRONE_COMMIT}}
commands:
+2 -2
View File
@@ -1,11 +1,11 @@
ARG REPO=library
FROM ${REPO}/alpine:3.18
FROM ${REPO}/alpine:3.17
RUN mkdir /app
WORKDIR /app/
# Add common checking tools
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2023c
RUN apk --no-cache add bash=~5 curl=~8 jq=~1.6 bind-tools~=9
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+3 -3
View File
@@ -1,5 +1,5 @@
ARG REPO=library
FROM golang:1.20 AS builder
FROM golang:1.17 AS builder
RUN mkdir /app
WORKDIR /app
@@ -14,7 +14,7 @@ ARG VERSION=dev
ENV CGO_ENABLED=0 GOOS=linux GOARCH=${ARCH}
RUN go build -ldflags "-X main.version=${VERSION}" -a -installsuffix nocgo -o minitor .
FROM ${REPO}/alpine:3.18
FROM ${REPO}/alpine:3.17
RUN mkdir /app
WORKDIR /app/
@@ -22,7 +22,7 @@ WORKDIR /app/
COPY --from=builder /app/minitor .
# Add common checking tools
RUN apk --no-cache add bash=~5 curl=~8 jq=~1 bind-tools=~9 tzdata~=2023c
RUN apk --no-cache add bash=~5 curl=~8 jq=~1.6 bind-tools~=9
# Add minitor user for running as non-root
RUN addgroup -S minitor && adduser -S minitor -G minitor
+2 -22
View File
@@ -46,8 +46,6 @@ docker run -v $PWD/config.yml:/app/config.yml iamthefij/minitor-go:latest
Images are provided for `amd64`, `arm`, and `arm64` architechtures.
Timezone configuration for the container is set by passing the `TZ` env variable. Eg. `TZ=America/Los_Angeles`.
## Configuring
In this repo, you can explore the `sample-config.yml` file for an example, but the general structure is as follows. It should be noted that environment variable interpolation happens on load of the YAML file.
@@ -96,28 +94,10 @@ Also, when alerts are executed, they will be passed through Go's format function
|`{{.AlertCount}}`|Number of times this monitor has alerted|
|`{{.FailureCount}}`|The total number of sequential failed checks for this monitor|
|`{{.LastCheckOutput}}`|The last returned value from the check command to either stderr or stdout|
|`{{.LastSuccess}}`|The datetime of the last successful check as a go Time struct|
|`{{.LastSuccess}}`|The ISO datetime of the last successful check|
|`{{.MonitorName}}`|The name of the monitor that failed and triggered the alert|
|`{{.IsUp}}`|Indicates if the monitor that is alerting is up or not. Can be used in a conditional message template|
To provide flexible formatting, the following non-standard functions are available in templates:
|func|description|
|---|---|
|`ANSIC <Time>`|Formats provided time in ANSIC format|
|`UnixDate <Time>`|Formats provided time in UnixDate format|
|`RubyDate <Time>`|Formats provided time in RubyDate format|
|`RFC822Z <Time>`|Formats provided time in RFC822Z format|
|`RFC850 <Time>`|Formats provided time in RFC850 format|
|`RFC1123 <Time>`|Formats provided time in RFC1123 format|
|`RFC1123Z <Time>`|Formats provided time in RFC1123Z format|
|`RFC3339 <Time>`|Formats provided time in RFC3339 format|
|`RFC3339Nano <Time>`|Formats provided time in RFC3339Nano format|
|`FormatTime <Time> <string template>`|Formats provided time according to provided template|
|`InTZ <Time> <string timezone name>`|Converts provided time to parsed timezone from the provided name|
For more information, check out the [Go documentation for the time module](https://pkg.go.dev/time@go1.20.7#pkg-constants).
### Metrics
Minitor supports exporting metrics for [Prometheus](https://prometheus.io/). Prometheus is an open source tool for reading and querying metrics from different sources. Combined with another tool, [Grafana](https://grafana.com/), it allows building of charts and dashboards. You could also opt to just use Minitor to log check results, and instead do your alerting with Grafana.
@@ -178,7 +158,7 @@ minitor-go:
check_interval: 1m30s
```
For the time being, legacy configs for the Python version of Minitor should be compatible if you apply the `-py-compat` flag when running Minitor. Eventually, this flag will go away when later breaking changes are introduced.
The `-py-compat` flag has been removed. Any existing Python oriented configuration needs to be migrated to the new templates.
## Future
+3 -53
View File
@@ -5,7 +5,6 @@ import (
"errors"
"fmt"
"os/exec"
"strings"
"text/template"
"time"
@@ -45,70 +44,21 @@ func (alert Alert) IsValid() bool {
// BuildTemplates compiles command templates for the Alert
func (alert *Alert) BuildTemplates() error {
// TODO: Remove legacy template support later after 1.0
legacy := strings.NewReplacer(
"{alert_count}", "{{.AlertCount}}",
"{alert_message}", "{{.MonitorName}} check has failed {{.FailureCount}} times",
"{failure_count}", "{{.FailureCount}}",
"{last_output}", "{{.LastCheckOutput}}",
"{last_success}", "{{.LastSuccess}}",
"{monitor_name}", "{{.MonitorName}}",
)
slog.Debugf("Building template for alert %s", alert.Name)
// Time format func factory
tff := func(formatString string) func(time.Time) string {
return func(t time.Time) string {
return t.Format(formatString)
}
}
// Create some functions for formatting datetimes in popular formats
timeFormatFuncs := template.FuncMap{
"ANSIC": tff(time.ANSIC),
"UnixDate": tff(time.UnixDate),
"RubyDate": tff(time.RubyDate),
"RFC822Z": tff(time.RFC822Z),
"RFC850": tff(time.RFC850),
"RFC1123": tff(time.RFC1123),
"RFC1123Z": tff(time.RFC1123Z),
"RFC3339": tff(time.RFC3339),
"RFC3339Nano": tff(time.RFC3339Nano),
"FormatTime": func(t time.Time, timeFormat string) string {
return t.Format(timeFormat)
},
"InTZ": func(t time.Time, tzName string) (time.Time, error) {
tz, err := time.LoadLocation(tzName)
if err != nil {
return t, fmt.Errorf("failed to convert time to specified tz: %w", err)
}
return t.In(tz), nil
},
}
switch {
case alert.commandTemplate == nil && alert.Command.Command != nil:
alert.commandTemplate = []*template.Template{}
for i, cmdPart := range alert.Command.Command {
if PyCompat {
cmdPart = legacy.Replace(cmdPart)
}
alert.commandTemplate = append(alert.commandTemplate, template.Must(
template.New(alert.Name+fmt.Sprint(i)).Funcs(timeFormatFuncs).Parse(cmdPart),
template.New(alert.Name+fmt.Sprint(i)).Parse(cmdPart),
))
}
case alert.commandShellTemplate == nil && alert.Command.ShellCommand != "":
shellCmd := alert.Command.ShellCommand
if PyCompat {
shellCmd = legacy.Replace(shellCmd)
}
alert.commandShellTemplate = template.Must(
template.New(alert.Name).Funcs(timeFormatFuncs).Parse(shellCmd),
template.New(alert.Name).Parse(shellCmd),
)
default:
return fmt.Errorf("No template provided for alert %s: %w", alert.Name, errNoTemplate)
@@ -168,7 +118,7 @@ func (alert Alert) Send(notice AlertNotice) (outputStr string, err error) {
if err != nil {
err = fmt.Errorf(
"Alert %s failed to send. Returned %w: %w",
"Alert '%s' failed to send. Returned %v: %w",
alert.Name,
err,
ErrAlertFailed,
-13
View File
@@ -70,14 +70,6 @@ func TestAlertSend(t *testing.T) {
"Command shell with bad template",
false,
},
{
Alert{Command: CommandOrShell{ShellCommand: "echo {alert_message}"}},
AlertNotice{MonitorName: "test", FailureCount: 1},
"test check has failed 1 times\n",
false,
"Command shell with legacy template",
true,
},
// Test default log alert down
{
*NewLogAlert(),
@@ -100,8 +92,6 @@ func TestAlertSend(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
// Set PyCompat to value of compat flag
PyCompat = c.pyCompat
err := c.alert.BuildTemplates()
if err != nil {
@@ -121,9 +111,6 @@ func TestAlertSend(t *testing.T) {
log.Printf("Case failed: %s", c.name)
}
// Set PyCompat back to default value
PyCompat = false
log.Println("-----")
}
}
+5 -45
View File
@@ -13,11 +13,11 @@ var errInvalidConfig = errors.New("Invalid configuration")
// Config type is contains all provided user configuration
type Config struct {
CheckInterval SecondsOrDuration `yaml:"check_interval"`
DefaultAlertAfter int16 `yaml:"default_alert_after"`
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
DefaultAlertDown []string `yaml:"default_alert_down"`
DefaultAlertUp []string `yaml:"default_alert_up"`
CheckInterval time.Duration `yaml:"check_interval"`
DefaultAlertAfter int16 `yaml:"default_alert_after"`
DefaultAlertEvery *int16 `yaml:"default_alert_every"`
DefaultAlertDown []string `yaml:"default_alert_down"`
DefaultAlertUp []string `yaml:"default_alert_up"`
Monitors []*Monitor
Alerts map[string]*Alert
}
@@ -56,34 +56,6 @@ func (cos *CommandOrShell) UnmarshalYAML(unmarshal func(interface{}) error) erro
return nil
}
// SecondsOrDuration wraps a duration value for parsing a duration or seconds from YAML
// NOTE: This should be removed in favor of only parsing durations once compatibility is broken
type SecondsOrDuration struct {
value time.Duration
}
// Value returns a duration value
func (sod SecondsOrDuration) Value() time.Duration {
return sod.value
}
// UnmarshalYAML allows unmarshalling a duration value or seconds if an int was provided
func (sod *SecondsOrDuration) UnmarshalYAML(unmarshal func(interface{}) error) error {
var seconds int64
err := unmarshal(&seconds)
if err == nil {
sod.value = time.Second * time.Duration(seconds)
return nil
}
// Error indicates that we don't have an int
err = unmarshal(&sod.value)
return err
}
// IsValid checks config validity and returns true if valid
func (config Config) IsValid() (isValid bool) {
isValid = true
@@ -182,18 +154,6 @@ func LoadConfig(filePath string) (config Config, err error) {
slog.Debugf("Config values:\n%v\n", config)
// Add log alert if not present
if PyCompat {
// Initialize alerts list if not present
if config.Alerts == nil {
config.Alerts = map[string]*Alert{}
}
if _, ok := config.Alerts["log"]; !ok {
config.Alerts["log"] = NewLogAlert()
}
}
// Finish initializing configuration
if err = config.Init(); err != nil {
return
+3 -9
View File
@@ -15,7 +15,6 @@ func TestLoadConfig(t *testing.T) {
}{
{"./test/valid-config.yml", false, "Valid config file", false},
{"./test/valid-config-default-values.yml", false, "Valid config file with default values", false},
{"./test/valid-default-log-alert.yml", false, "Valid config file with default log alert PyCompat", true},
{"./test/valid-default-log-alert.yml", true, "Invalid config file no log alert", false},
{"./test/does-not-exist", true, "Invalid config path", false},
{"./test/invalid-config-type.yml", true, "Invalid config type for key", false},
@@ -25,8 +24,6 @@ func TestLoadConfig(t *testing.T) {
for _, c := range cases {
log.Printf("Testing case %s", c.name)
// Set PyCompat based on compatibility mode
PyCompat = c.pyCompat
_, err := LoadConfig(c.configPath)
hasErr := (err != nil)
@@ -34,9 +31,6 @@ func TestLoadConfig(t *testing.T) {
t.Errorf("LoadConfig(%v), expected_error=%v actual=%v", c.name, c.expectErr, err)
log.Printf("Case failed: %s", c.name)
}
// Set PyCompat to default value
PyCompat = false
}
}
@@ -53,15 +47,15 @@ func TestIntervalParsing(t *testing.T) {
oneMinute := time.Minute
// validate top level interval seconds represented as an int
if config.CheckInterval.Value() != oneSecond {
if config.CheckInterval != oneSecond {
t.Errorf("Incorrectly parsed int seconds. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
if config.Monitors[0].CheckInterval.Value() != tenSeconds {
if config.Monitors[0].CheckInterval != tenSeconds {
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
if config.Monitors[1].CheckInterval.Value() != oneMinute {
if config.Monitors[1].CheckInterval != oneMinute {
t.Errorf("Incorrectly parsed seconds duration. expected=%v actual=%v", oneSecond, config.CheckInterval)
}
+1 -1
View File
@@ -1,6 +1,6 @@
module git.iamthefij.com/iamthefij/minitor-go
go 1.20
go 1.17
require (
git.iamthefij.com/iamthefij/slog v1.3.0
-72
View File
@@ -1,72 +0,0 @@
package main
import (
"fmt"
"io"
"net/http"
"strings"
)
type HealthCheckHandler struct {
isMinitorHealthy bool
monitors []*Monitor
}
func NewHealthCheckHandler(monitors []*Monitor) *HealthCheckHandler {
return &HealthCheckHandler{
false,
monitors,
}
}
func (hch *HealthCheckHandler) MinitorHealthy(healthy bool) {
hch.isMinitorHealthy = healthy
}
func (hch HealthCheckHandler) MinitorHealthCheck() (bool, string) {
if hch.isMinitorHealthy {
return true, "OK"
} else {
return false, "UNHEALTHY"
}
}
func (hch HealthCheckHandler) MonitorsHealthCheck() (bool, string) {
downMonitors := []string{}
for _, monitor := range hch.monitors {
if !monitor.IsUp() {
downMonitors = append(downMonitors, monitor.Name)
}
}
if len(downMonitors) == 0 {
return true, "OK"
} else {
return false, fmt.Sprintf("UNHEALTHY: The following monitors are unhealthy: %s", strings.Join(downMonitors, ", "))
}
}
func (hch HealthCheckHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
var healthy bool
var body string
if monitors := r.URL.Query().Get("monitors"); monitors != "" {
healthy, body = hch.MonitorsHealthCheck()
} else {
healthy, body = hch.MinitorHealthCheck()
}
if healthy {
w.WriteHeader(http.StatusOK)
} else {
w.WriteHeader(http.StatusServiceUnavailable)
}
_, _ = io.WriteString(w, body)
}
func HandleHealthCheck() {
http.Handle("/metrics", HealthChecks)
}
-79
View File
@@ -1,79 +0,0 @@
package main
import (
"testing"
)
func TestNewHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
monitors[0].alertCount++
if healthy, _ := hc.MinitorHealthCheck(); healthy {
t.Errorf("Initial hc state should be unhealthy until some successful alert is sent")
}
if healthy, _ := hc.MonitorsHealthCheck(); healthy {
t.Errorf("Faking an alert on the monitor pointer should make this unhealthy")
}
}
func TestMinitorHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
t.Run("MinitorHealthCheck(healthy)", func(t *testing.T) {
hc.MinitorHealthy(true)
healthy, body := hc.MinitorHealthCheck()
if !healthy {
t.Errorf("Expected healthy check")
}
if body != "OK" {
t.Errorf("Expected OK response")
}
})
t.Run("MinitorHealthCheck(unhealthy)", func(t *testing.T) {
hc.MinitorHealthy(false)
healthy, body := hc.MinitorHealthCheck()
if healthy {
t.Errorf("Expected healthy check")
}
if body != "UNHEALTHY" {
t.Errorf("Expected UNHEALTHY response")
}
})
}
func TestMonitorsHealthCheck(t *testing.T) {
monitors := []*Monitor{
{Name: "Test Monitor"},
}
hc := NewHealthCheckHandler(monitors)
t.Run("MonitorsHealthCheck(healthy)", func(t *testing.T) {
healthy, body := hc.MonitorsHealthCheck()
if !healthy {
t.Errorf("Expected healthy check")
}
if body != "OK" {
t.Errorf("Expected OK response")
}
})
t.Run("MonitorsHealthCheck(unhealthy)", func(t *testing.T) {
monitors[0].alertCount++
healthy, body := hc.MonitorsHealthCheck()
if healthy {
t.Errorf("Expected healthy check")
}
if body != "UNHEALTHY: The following monitors are unhealthy: Test Monitor" {
t.Errorf("Expected UNHEALTHY response")
}
})
}
+5 -50
View File
@@ -4,7 +4,6 @@ import (
"errors"
"flag"
"fmt"
"net/http"
"time"
"git.iamthefij.com/iamthefij/slog"
@@ -17,13 +16,6 @@ var (
MetricsPort = 8080
// Metrics contains all active metrics
Metrics = NewMetrics()
// Self monitor rather than panicing
SelfMonitor = false
// HealthChecks contains health check values
HealthChecks *HealthCheckHandler = nil
// PyCompat enables support for legacy Python templates
PyCompat = false
// version of minitor being run
version = "dev"
@@ -56,13 +48,7 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
output,
)
if SelfMonitor {
Metrics.SetMonitorStatus(fmt.Sprintf("Alert %s", alertName), false)
}
return err
} else {
Metrics.SetMonitorStatus(fmt.Sprintf("Alert %s", alertName), true)
}
// Count alert metrics
@@ -80,8 +66,6 @@ func sendAlerts(config *Config, monitor *Monitor, alertNotice *AlertNotice) erro
func checkMonitors(config *Config) error {
// TODO: Run this in goroutines and capture exceptions
healthy := true
for _, monitor := range config.Monitors {
if monitor.ShouldCheck() {
success, alertNotice := monitor.Check()
@@ -93,42 +77,24 @@ func checkMonitors(config *Config) error {
if alertNotice != nil {
err := sendAlerts(config, monitor, alertNotice)
// If there was an error in sending an alert, mark as unhealthy or bubble up
// If there was an error in sending an alert, exit early and bubble it up
if err != nil {
if SelfMonitor {
healthy = false
} else {
return err
}
return err
}
}
}
}
if HealthChecks != nil {
HealthChecks.MinitorHealthy(healthy)
}
return nil
}
// ServeMetricsAndHealth starts the default http server
func ServeMetricsAndHealth() {
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
func main() {
showVersion := flag.Bool("version", false, "Display the version of minitor and exit")
configPath := flag.String("config", "config.yml", "Alternate configuration path (default: config.yml)")
flag.BoolVar(&slog.DebugLevel, "debug", false, "Enables debug logs (default: false)")
flag.BoolVar(&ExportMetrics, "metrics", false, "Enables prometheus metrics exporting (default: false)")
flag.BoolVar(&PyCompat, "py-compat", false, "Enables support for legacy Python Minitor config. Will eventually be removed. (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics and healthchecks should be exported on, if enabled. (default: 8080)")
flag.BoolVar(&SelfMonitor, "self-monitor", false, "Enables self-monitoring. Export metrics rather than panic when alerts fail. (default: false)")
flag.IntVar(&MetricsPort, "metrics-port", MetricsPort, "The port that Prometheus metrics should be exported on, if enabled. (default: 8080)")
flag.Parse()
// Print version if flag is provided
@@ -145,19 +111,8 @@ func main() {
// Serve metrics exporter, if specified
if ExportMetrics {
slog.Infof("Exporting metrics to Prometheus on port %d", MetricsPort)
HandleMetrics()
}
if SelfMonitor {
slog.Infof("Starting healthcheck endpoint on port %d", MetricsPort)
HealthChecks = NewHealthCheckHandler(config.Monitors)
HandleHealthCheck()
}
if ExportMetrics || SelfMonitor {
go ServeMetricsAndHealth()
go ServeMetrics()
}
// Start main loop
@@ -165,6 +120,6 @@ func main() {
err = checkMonitors(&config)
slog.OnErrPanicf(err, "Error checking monitors")
time.Sleep(config.CheckInterval.Value())
time.Sleep(config.CheckInterval)
}
}
+17 -63
View File
@@ -4,10 +4,9 @@ import "testing"
func TestCheckMonitors(t *testing.T) {
cases := []struct {
config Config
expectErr bool
name string
selfMonitor bool
config Config
expectErr bool
name string
}{
{
config: Config{},
@@ -23,9 +22,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: false,
name: "Monitor success, no alerts",
selfMonitor: false,
expectErr: false,
name: "Monitor success, no alerts",
},
{
config: Config{
@@ -37,9 +35,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: false,
name: "Monitor failure, no alerts",
selfMonitor: false,
expectErr: false,
name: "Monitor failure, no alerts",
},
{
config: Config{
@@ -51,9 +48,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: false,
name: "Monitor recovery, no alerts",
selfMonitor: false,
expectErr: false,
name: "Monitor recovery, no alerts",
},
{
config: Config{
@@ -66,9 +62,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: true,
name: "Monitor failure, unknown alerts",
selfMonitor: false,
expectErr: true,
name: "Monitor failure, unknown alerts",
},
{
config: Config{
@@ -81,24 +76,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: true,
name: "Monitor recovery, unknown alerts",
selfMonitor: false,
},
{
config: Config{
Monitors: []*Monitor{
{
Name: "Success",
Command: CommandOrShell{Command: []string{"true"}},
AlertUp: []string{"unknown"},
alertCount: 1,
},
},
},
expectErr: false,
name: "Monitor recovery, unknown alerts, with Health Check",
selfMonitor: true,
expectErr: true,
name: "Monitor recovery, unknown alerts",
},
{
config: Config{
@@ -116,9 +95,8 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: false,
name: "Monitor failure, successful alert",
selfMonitor: false,
expectErr: false,
name: "Monitor failure, successful alert",
},
{
config: Config{
@@ -137,36 +115,12 @@ func TestCheckMonitors(t *testing.T) {
},
},
},
expectErr: true,
name: "Monitor failure, bad alert",
selfMonitor: false,
},
{
config: Config{
Monitors: []*Monitor{
{
Name: "Failure",
Command: CommandOrShell{Command: []string{"false"}},
AlertDown: []string{"bad"},
AlertAfter: 1,
},
},
Alerts: map[string]*Alert{
"bad": {
Name: "bad",
Command: CommandOrShell{Command: []string{"false"}},
},
},
},
expectErr: false,
name: "Monitor failure, bad alert, with Health Check",
selfMonitor: true,
expectErr: true,
name: "Monitor failure, bad alert",
},
}
for _, c := range cases {
SelfMonitor = c.selfMonitor
err := c.config.Init()
if err != nil {
t.Errorf("checkMonitors(%s): unexpected error reading config: %v", c.name, err)
+7 -2
View File
@@ -1,6 +1,7 @@
package main
import (
"fmt"
"net/http"
"github.com/prometheus/client_golang/prometheus"
@@ -106,7 +107,11 @@ func (metrics *MinitorMetrics) CountAlert(monitor string, alert string) {
).Inc()
}
// HandleMetrics add Prometheus metrics handler to default http server
func HandleMetrics() {
// ServeMetrics starts an http server with a Prometheus metrics handler
func ServeMetrics() {
http.Handle("/metrics", promhttp.Handler())
host := fmt.Sprintf(":%d", MetricsPort)
_ = http.ListenAndServe(host, nil)
}
+4 -4
View File
@@ -11,9 +11,9 @@ import (
// Monitor represents a particular periodic check of a command
type Monitor struct { //nolint:maligned
// Config values
AlertAfter int16 `yaml:"alert_after"`
AlertEvery *int16 `yaml:"alert_every"`
CheckInterval SecondsOrDuration `yaml:"check_interval"`
AlertAfter int16 `yaml:"alert_after"`
AlertEvery *int16 `yaml:"alert_every"`
CheckInterval time.Duration `yaml:"check_interval"`
Name string
AlertDown []string `yaml:"alert_down"`
AlertUp []string `yaml:"alert_up"`
@@ -45,7 +45,7 @@ func (monitor Monitor) ShouldCheck() bool {
sinceLastCheck := time.Since(monitor.lastCheck)
return sinceLastCheck >= monitor.CheckInterval.Value()
return sinceLastCheck >= monitor.CheckInterval
}
// Check will run the command configured by the Monitor and return a status
+3 -3
View File
@@ -45,9 +45,9 @@ func TestMonitorShouldCheck(t *testing.T) {
name string
}{
{Monitor{}, true, "Empty"},
{Monitor{lastCheck: timeNow, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: SecondsOrDuration{time.Second * 15}}, true, "-20s"},
{Monitor{lastCheck: timeNow, CheckInterval: time.Second * 15}, false, "Just checked"},
{Monitor{lastCheck: timeTenSecAgo, CheckInterval: time.Second * 15}, false, "-10s"},
{Monitor{lastCheck: timeTwentySecAgo, CheckInterval: time.Second * 15}, true, "-20s"},
}
for _, c := range cases {
+1 -1
View File
@@ -1,5 +1,5 @@
---
check_interval: 1
check_interval: 1s
monitors:
- name: Command