Refactor WireGuard keepalive script: reduce initial sleep time, increase failure threshold, and improve server IP resolution logic
continuous-integration/drone/push Build is passing

This commit is contained in:
gyurix
2026-03-22 17:03:33 +01:00
parent b132ab782e
commit 38a33ac7b5
+7 -8
View File
@@ -5,17 +5,18 @@ IFACE="${INTERFACE:-wg0}"
# Wait for the WireGuard interface to be fully up # Wait for the WireGuard interface to be fully up
echo "WireGuard keepalive monitor: waiting 15s for interface $IFACE to come up..." echo "WireGuard keepalive monitor: waiting 15s for interface $IFACE to come up..."
sleep 15 sleep 5
PERSISTENT_KEEP_ALIVE=$(grep PersistentKeepalive "$CONF" | awk '{print $3}') PERSISTENT_KEEP_ALIVE=$(grep PersistentKeepalive "$CONF" | awk '{print $3}')
# Prefer the Endpoint IP; fall back to the Address-derived gateway # Prefer the Endpoint IP; fall back to the Address-derived gateway
WG_SERVER_IP=$(grep Endpoint "$CONF" | awk '{print $3}' | cut -d: -f1) WG_SERVER_IP=$(grep Endpoint "$CONF" | awk '{print $3}' | cut -d: -f1)
if [ -z "$WG_SERVER_IP" ]; then PING=$(ping -c 1 -W 5 "$WG_SERVER_IP" | grep -E '1 packets transmitted, 1 packets received')
if [ ! -z "$PING" ]; then
WG_SERVER_IP="$(grep Address "$CONF" | awk '{print $3}' | cut -d. -f1-3).1" WG_SERVER_IP="$(grep Address "$CONF" | awk '{print $3}' | cut -d. -f1-3).1"
fi fi
PING_INTERVAL=${PERSISTENT_KEEP_ALIVE:-25} PING_INTERVAL=${PERSISTENT_KEEP_ALIVE:-25}
MAX_FAILURES=3 MAX_FAILURES=10
fail_count=0 fail_count=0
echo "WireGuard keepalive monitor started (target: $WG_SERVER_IP, interval: ${PING_INTERVAL}s, threshold: $MAX_FAILURES)" echo "WireGuard keepalive monitor started (target: $WG_SERVER_IP, interval: ${PING_INTERVAL}s, threshold: $MAX_FAILURES)"
@@ -27,11 +28,9 @@ while true; do
fail_count=$((fail_count + 1)) fail_count=$((fail_count + 1))
echo "WireGuard keepalive ping failed ($fail_count/$MAX_FAILURES) to $WG_SERVER_IP via $IFACE" echo "WireGuard keepalive ping failed ($fail_count/$MAX_FAILURES) to $WG_SERVER_IP via $IFACE"
if [ "$fail_count" -ge "$MAX_FAILURES" ]; then if [ "$fail_count" -ge "$MAX_FAILURES" ]; then
echo "WireGuard connection is stuck after $MAX_FAILURES consecutive failures forcing container restart..." echo "WireGuard connection is stuck after $MAX_FAILURES consecutive failures ... forcing container restart..."
# SIGKILL tini (PID 1) so the container exits with code 137 and Docker restarts it exec kill -9 1
kill -9 1
exit 1
fi fi
fi fi
sleep "$PING_INTERVAL" sleep "$PING_INTERVAL"
done done