Process management is how scripts coordinate work — running tasks in parallel, waiting for completion, handling interruptions gracefully, and preventing duplicate runs. Every long-running automation script needs these patterns.
1
Background jobs — &, wait, jobs
BASH
# ── Run in background ─────────────────────────────────────
sleep 10 & # run in background, get PID
BG_PID=$! # PID of last background job
echo "Started PID ${BG_PID}"
# ── wait — block until background job finishes ────────────
wait "${BG_PID}" # wait for specific PID
echo "Exit code: $?"
wait # wait for ALL background jobs
# ── Parallel execution — run multiple tasks at once ───────
pids=()
for server in web-01 web-02 web-03; do
echo " Deploying to ${server}..."
ssh "${server}" "./deploy.sh" &
pids+=($!)
done
# Wait for all and check exit codes
failed=0
for pid in "${pids[@]}"; do
wait "${pid}" || (( failed++ ))
done
(( failed > 0 )) && echo "ERROR: ${failed} deployments failed" >&2
# ── jobs — list background jobs ───────────────────────────
jobs # show background jobs
jobs -l # with PIDs
fg %1 # bring job 1 to foreground
bg %1 # send job 1 to background
2
Signals and trap — graceful cleanup
BASH
# ── Common signals ────────────────────────────────────────
# SIGINT (2) — Ctrl+C — interrupt
# SIGTERM (15) — kill pid — polite termination
# SIGKILL (9) — kill -9 pid — force kill (cannot be trapped)
# SIGHUP (1) — terminal close / reload config
# EXIT — pseudo-signal, runs when script exits for any reason
# ── trap syntax ───────────────────────────────────────────
# trap 'command' SIGNAL [SIGNAL ...]
# ── Cleanup on any exit ───────────────────────────────────
TMPFILE=$(mktemp)
LOCKFILE=/tmp/myapp.lock
cleanup() {
echo " Cleaning up..."
rm -f "${TMPFILE}" "${LOCKFILE}"
kill 0 2>/dev/null # kill all child processes in group
}
trap cleanup EXIT # runs on ANY exit
trap cleanup INT TERM HUP # also on signals
# ── Graceful shutdown with message ────────────────────────
handle_sigterm() {
echo " SIGTERM received — shutting down gracefully..."
# finish current work, then exit cleanly
SHUTDOWN=true
}
trap handle_sigterm TERM
# Main loop checks the flag
SHUTDOWN=false
while ! "${SHUTDOWN}"; do
process_one_job
done
echo " Shutdown complete"
3
ps, pgrep, pkill — inspect and control processes
BASH
# ── ps — process status ───────────────────────────────────
ps aux # all processes, detailed
ps aux | grep nginx # filter by name
ps -p 1234 # specific PID
ps -u vriddh # processes of user
ps --sort=-%cpu | head -10 # top CPU consumers
ps --sort=-%mem | head -10 # top memory consumers
# ── pgrep / pkill — find/kill by name ────────────────────
pgrep nginx # get PIDs of nginx processes
pgrep -l nginx # with process names
pgrep -u vriddh # processes of user
pgrep -x nginx # exact name match
pkill nginx # kill all nginx processes (SIGTERM)
pkill -9 myapp # force kill
pkill -HUP nginx # send SIGHUP (reload)
pkill -u vriddh sleep # kill vriddh's sleep processes
# ── kill — by PID ─────────────────────────────────────────
kill 1234 # SIGTERM (15)
kill -9 1234 # SIGKILL (9) — last resort
kill -HUP 1234 # SIGHUP — reload config
kill -0 1234 # check if PID exists (no signal sent)
# ── Check if process is running ──────────────────────────
is_running() {
local name="${1}"
pgrep -x "${name}" >/dev/null 2>&1
}
if is_running "nginx"; then
echo " ✔ nginx is running"
else
echo " ✘ nginx is NOT running"
fi
4
nohup, disown, and lockfiles
BASH
# ── nohup — survive terminal close ───────────────────────
nohup ./long_process.sh & # output goes to nohup.out
nohup ./long_process.sh > /var/log/process.log 2>&1 &
echo "PID: $!"
# ── disown — detach background job from shell ─────────────
./long_process.sh &
disown # detach last background job
disown %1 # detach job 1
disown -h %1 # keep in jobs list but ignore SIGHUP
# ── Lockfile — prevent duplicate script runs ──────────────
LOCKFILE="/tmp/$(basename "$0").lock"
acquire_lock() {
if [ -f "${LOCKFILE}" ]; then
local locked_pid
locked_pid=$(cat "${LOCKFILE}" 2>/dev/null)
if kill -0 "${locked_pid}" 2>/dev/null; then
echo "ERROR: Already running (PID ${locked_pid})" >&2
exit 1
fi
echo " Stale lock found — cleaning up"
fi
echo $$ > "${LOCKFILE}"
}
trap 'rm -f "${LOCKFILE}"' EXIT
acquire_lock
# ── flock — kernel-level locking (more reliable) ──────────
# Exclusive lock — only one instance at a time
exec 9>/tmp/myapp.lock
flock -n 9 || { echo "Already running" >&2; exit 1; }
# Or one-liner with flock
flock -n /tmp/myapp.lock ./myapp.sh || echo "Already running"
Terminal output
Key
Running
Not running / error
PID info
vriddh@prod-01:~/scripts$./deploy_parallel.sh
Deploying to web-01...
Deploying to web-02...
Deploying to web-03...
[Waiting for all to finish...]
✔ web-01 done (PID 18421)
✔ web-02 done (PID 18422)
✘ web-03 failed (PID 18423) exit=1
ERROR: 1 deployment failed
vriddh@prod-01:~/scripts$./backup.sh
ERROR: Already running (PID 19234)
█
✔ Process management rules — Always save
$! after forking a background job. Always wait for background jobs and check their exit codes. Use trap cleanup EXIT — it runs even on set -e failures. Use flock over manual lockfiles for production scripts — it's atomic and auto-releases on script exit.