Log files are the most information-dense artifact a server produces. A well-designed log analysis pipeline extracts traffic patterns, error rates, slow endpoints, top clients, and geographic distributions in seconds — without loading a single GB into Elasticsearch.
1Nginx access log analysis pipeline
BASH
#!/usr/bin/env bash
# log_analysis.sh — Complete Nginx access log report
set -euo pipefail
LOG="${1:-/var/log/nginx/access.log}"
DATE="${2:-$(date -d yesterday +%Y-%m-%d 2>/dev/null || date -v-1d +%Y-%m-%d)}"
# Filter to yesterday's lines only
grep "^.*\[${DATE}" "${LOG}" 2>/dev/null > /tmp/log_slice.txt || \
cp "${LOG}" /tmp/log_slice.txt
TOTAL=$(wc -l < /tmp/log_slice.txt)
[[ "${TOTAL}" -eq 0 ]] && { echo "No log entries for ${DATE}"; exit 0; }
printf "\n %-60s\n" "ACCESS LOG REPORT — ${DATE}"
printf " %-60s\n\n" "$(printf '─%.0s' {1..60})"
# ── Traffic summary ───────────────────────────────────────
printf " TRAFFIC SUMMARY\n"
printf " %-30s %s\n" "Total requests:" "${TOTAL}"
printf " %-30s %s\n" "Unique IPs:" "$(awk '{print $1}' /tmp/log_slice.txt | sort -u | wc -l)"
printf " %-30s %s\n" "Total bandwidth:" \
"$(awk '{s+=$10}END{printf "%.1f MB",s/1048576}' /tmp/log_slice.txt)"
# HTTP status breakdown
printf "\n STATUS CODES\n"
awk '{print $9}' /tmp/log_slice.txt | sort | uniq -c | sort -rn | head -10 | \
awk '{printf " %-10s %6d %s\n",$2,$1,($2>=500?"✘ ERROR":($2>=400?"⚠ CLIENT":"✔"))}'
# ── Top endpoints ─────────────────────────────────────────
printf "\n TOP 10 ENDPOINTS\n"
awk '{print $7}' /tmp/log_slice.txt | \
sed 's/\?.*$//' | sort | uniq -c | sort -rn | head -10 | \
awk '{printf " %6d %s\n",$1,$2}'
# ── Top IPs ───────────────────────────────────────────────
printf "\n TOP 10 CLIENTS\n"
awk '{print $1}' /tmp/log_slice.txt | sort | uniq -c | sort -rn | head -10 | \
awk '{printf " %6d %s\n",$1,$2}'
# ── Error analysis ────────────────────────────────────────
printf "\n TOP ERRORS (4xx/5xx)\n"
awk '$9>=400{print $9,$7}' /tmp/log_slice.txt | \
sed 's/\?.*$//' | sort | uniq -c | sort -rn | head -10 | \
awk '{printf " %6d %s %s\n",$1,$2,$3}'
# ── Response time percentiles ─────────────────────────────
printf "\n RESPONSE TIME (ms)\n"
awk 'NF>11{print $NF*1000}' /tmp/log_slice.txt | sort -n | \
awk 'BEGIN{n=0} {a[n++]=$1} END{
printf " P50: %.0fms P95: %.0fms P99: %.0fms Max: %.0fms\n",
a[int(n*0.50)], a[int(n*0.95)], a[int(n*0.99)], a[n-1]
}'
# ── Hourly traffic heatmap ─────────────────────────────────
printf "\n HOURLY TRAFFIC\n"
awk '{match($4,/:[0-9]+:/); h=substr($4,RSTART+1,2); count[h]++}
END{
max=0; for(h in count) if(count[h]>max) max=count[h]
for(h=0;h<24;h++) {
key=sprintf("%02d",h)
bar=count[key]+0
filled=int(bar/max*30)
printf " %02d |",h
for(i=0;i
2Application log error trend detection
BASH
#!/usr/bin/env bash
# error_trend.sh — Detect rising error rates in application logs
LOG_DIR="/var/log/myapp"
WINDOW_MINUTES=5
THRESHOLD=50 # errors per window to alert
# Count errors in last N minutes
count_recent_errors() {
local pattern="${1:-ERROR}"
find "${LOG_DIR}" -name "*.log" -newer /tmp/.error_check_last 2>/dev/null \
-exec grep -c "${pattern}" {} + 2>/dev/null | \
awk '{s+=$1}END{print s+0}'
}
touch /tmp/.error_check_last
sleep "${WINDOW_MINUTES}m" &
SLEEP_PID=$!
# Background: tail logs for the window
tail -f "${LOG_DIR}"/app.log 2>/dev/null | \
awk -v limit="${THRESHOLD}" '
/ERROR/{errors++}
errors >= limit {
print "THRESHOLD: "errors" errors in window"
exit
}
' &
TAIL_PID=$!
wait "${SLEEP_PID}"
kill "${TAIL_PID}" 2>/dev/null
# Final count
ERRORS=$(count_recent_errors "ERROR")
CRITS=$(count_recent_errors "CRITICAL")
touch /tmp/.error_check_last
printf " Last %d min: %d ERRORs, %d CRITICALs\n" \
"${WINDOW_MINUTES}" "${ERRORS}" "${CRITS}"
(( ERRORS > THRESHOLD )) && \
echo "ALERT: ${ERRORS} errors exceed threshold ${THRESHOLD}" | \
mail -s "Error spike on $(hostname)" ops@example.com
✔ Log analysis rules — Always filter by date first to avoid processing irrelevant historical data. Use
awk '{print $9}' for Nginx status codes, $7 for URLs, $1 for IPs — the combined log format is consistent. Strip query strings with sed 's/?.*//' before counting endpoints. Store slice files in /tmp and clean up after. Run the full analysis in cron daily and ship the output by email.