A server health check dashboard combines everything from this course into one production script — system metrics, database connectivity, service status, disk usage, and a formatted terminal report. This is the kind of script you run at the start of every shift and schedule every 5 minutes in monitoring.
1Complete server health dashboard script
BASH
#!/usr/bin/env bash
# health_dashboard.sh — Real-time server health overview
set -euo pipefail
# ── Formatting helpers ────────────────────────────────────
RED=$'\033[0;31m' YLW=$'\033[0;33m' GRN=$'\033[0;32m'
BLU=$'\033[0;34m' DIM=$'\033[0;90m' RST=$'\033[0m'
BOLD=$'\033[1m'
hr() { printf '%s\n' "${DIM}$(printf '─%.0s' {1..60})${RST}"; }
ok() { printf " ${GRN}✔${RST} %-35s ${GRN}%s${RST}\n" "$1" "$2"; }
warn() { printf " ${YLW}⚠${RST} %-35s ${YLW}%s${RST}\n" "$1" "$2"; }
fail() { printf " ${RED}✘${RST} %-35s ${RED}%s${RST}\n" "$1" "$2"; }
section(){ echo; printf "${BOLD}${BLU} %s${RST}\n" "$1"; hr; }
echo
printf "${BOLD} SERVER HEALTH DASHBOARD — $(date '+%Y-%m-%d %H:%M:%S')${RST}\n"
printf " Host: ${HOSTNAME} | Uptime: $(uptime -p 2>/dev/null || uptime | awk '{print $3,$4}')\n"
# ── CPU ───────────────────────────────────────────────────
section "SYSTEM RESOURCES"
CPU_IDLE=$(top -bn1 | awk '/^%Cpu/{print $8}' | tr -d '%')
CPU_USED=$(awk "BEGIN{printf \"%.0f\", 100 - ${CPU_IDLE:-0}}")
MEM_TOTAL=$(free -m | awk '/Mem:/{print $2}')
MEM_USED=$(free -m | awk '/Mem:/{print $3}')
MEM_PCT=$(awk "BEGIN{printf \"%.0f\", ${MEM_USED}*100/${MEM_TOTAL}}")
(( CPU_USED < 70 )) && ok "CPU usage" "${CPU_USED}%" || \
(( CPU_USED < 90 )) && warn "CPU usage" "${CPU_USED}%" || fail "CPU usage" "${CPU_USED}%"
(( MEM_PCT < 75 )) && ok "Memory usage" "${MEM_USED}/${MEM_TOTAL}MB (${MEM_PCT}%)" || \
warn "Memory usage" "${MEM_USED}/${MEM_TOTAL}MB (${MEM_PCT}%)"
# Load average
LOAD=$(awk '{print $1}' /proc/loadavg)
CORES=$(nproc)
LOAD_PCT=$(awk "BEGIN{printf \"%.0f\", ${LOAD}*100/${CORES}}")
(( LOAD_PCT < 100 )) && ok "Load average" "${LOAD} (${CORES} cores, ${LOAD_PCT}%)" || \
warn "Load average" "${LOAD} (${CORES} cores, ${LOAD_PCT}%)"
# ── Disk ──────────────────────────────────────────────────
section "DISK USAGE"
df -h | awk 'NR>1 && /^\// {
gsub(/%/,"",$5)
if ($5+0 >= 90) printf " \033[0;31m✘\033[0m %-25s %s used (%s%%)\n",$6,$3,$5
else if ($5+0 >= 75) printf " \033[0;33m⚠\033[0m %-25s %s used (%s%%)\n",$6,$3,$5
else printf " \033[0;32m✔\033[0m %-25s %s used (%s%%)\n",$6,$3,$5
}'
# ── Services ──────────────────────────────────────────────
section "SERVICES"
for svc in nginx mysql redis-server; do
if systemctl is-active --quiet "${svc}" 2>/dev/null; then
ok "${svc}" "active"
elif pgrep -x "${svc}" &>/dev/null; then
ok "${svc}" "running (no systemd)"
else
fail "${svc}" "NOT RUNNING"
fi
done
# ── Database ──────────────────────────────────────────────
section "DATABASE"
if mysql --defaults-file=/etc/myapp/mysql.conf -e "SELECT 1" &>/dev/null; then
CONNS=$(mysql --defaults-file=/etc/myapp/mysql.conf -BNs \
-e "SELECT COUNT(*) FROM information_schema.PROCESSLIST")
ok "MySQL" "${CONNS} connections"
else
fail "MySQL" "UNREACHABLE"
fi
if redis-cli PING 2>/dev/null | grep -q PONG; then
KEYS=$(redis-cli DBSIZE 2>/dev/null)
ok "Redis" "${KEYS} keys"
else
fail "Redis" "UNREACHABLE"
fi
# ── Recent errors ─────────────────────────────────────────
section "RECENT ERRORS (last 15min)"
ERROR_COUNT=$(find /var/log/myapp -name "*.log" -newer \
/tmp/.health_check_last 2>/dev/null -exec grep -c ERROR {} + 2>/dev/null | \
awk '{s+=$1}END{print s+0}')
touch /tmp/.health_check_last
(( ERROR_COUNT == 0 )) && ok "Application errors" "none" || \
warn "Application errors" "${ERROR_COUNT} in last 15min"
echo
2Scheduled monitoring with alerting
BASH
#!/usr/bin/env bash
# monitor.sh — Cron-scheduled version with alerts
set -euo pipefail
ALERT_LOG="/var/log/myapp/alerts.log"
SLACK_URL="${SLACK_WEBHOOK_URL:-}"
THRESHOLD_CPU=85
THRESHOLD_MEM=85
THRESHOLD_DISK=85
alert() {
local level="$1" msg="$2"
echo "[$(date --iso-8601=seconds)] [${level}] ${msg}" >> "${ALERT_LOG}"
[[ -n "${SLACK_URL}" ]] && curl -s -X POST "${SLACK_URL}" \
-H "Content-Type: application/json" \
-d "{\"text\":\"[${level}] $(hostname): ${msg}\"}" &>/dev/null
logger -t health_monitor "${level}: ${msg}"
}
# CPU check
CPU=$(top -bn1 | awk '/^%Cpu/{gsub(/%us,/,"",$2); print 100-$8}' | cut -d. -f1)
(( CPU > THRESHOLD_CPU )) && alert "WARNING" "CPU ${CPU}% (threshold ${THRESHOLD_CPU}%)"
# Memory check
MEM_PCT=$(free | awk '/Mem:/{printf "%.0f", $3/$2*100}')
(( MEM_PCT > THRESHOLD_MEM )) && alert "WARNING" "Memory ${MEM_PCT}% (threshold ${THRESHOLD_MEM}%)"
# Disk checks
df -h | awk 'NR>1 && /^\// {gsub(/%/,"",$5); if ($5+0>THRESH) print $6,$5}' \
THRESH="${THRESHOLD_DISK}" | while read -r mount pct; do
alert "WARNING" "Disk ${mount} at ${pct}% (threshold ${THRESHOLD_DISK}%)"
done
# Service checks
for svc in nginx mysql; do
systemctl is-active --quiet "${svc}" || alert "CRITICAL" "${svc} is NOT running"
done
# Dead-man: if no log entry in 10 min, cron has failed
echo "$(date)" > /tmp/.monitor_heartbeat
vriddh@prod-01:~/scripts$./health_dashboard.sh
SERVER HEALTH DASHBOARD — 2026-05-01 14:30:00
Host: prod-web-01 | Uptime: up 47 days
SYSTEM RESOURCES
✔ CPU usage 42%
✔ Memory usage 3821/8192MB (47%)
✔ Load average 1.82 (8 cores, 23%)
DISK USAGE
✔ / 14GB used (28%)
⚠ /data 342GB used (78%)
SERVICES
✔ nginx active
✔ mysql active
✘ redis-server NOT RUNNING
█
✔ Health dashboard rules — Always output to stdout so it renders in terminals and can be piped to files. Use ANSI colour codes from variables so they can be disabled by setting to empty strings (
RED=''). Write a heartbeat file on every successful run so you can detect if the monitoring script itself has stopped. Schedule with */5 * * * * /opt/scripts/monitor.sh 2>&1 | logger -t monitor for cron + syslog integration.