Shell Scripting AWK Mathematics Advanced May 2026

Shell Scripting Advanced AWK: Mathematical Functions & Formatting

Master AWK's built-in math functions — sqrt, log, exp, sin, cos, atan2, int, and rand. Compute statistics (mean, variance, stddev), build histogram buckets, format numbers as human-readable sizes, and generate data summaries.

AWK is a complete numeric processing engine, not just a text tool. Its mathematical functions cover everything from basic rounding and random numbers to trigonometry and logarithms. Combined with aggregation arrays, AWK can compute descriptive statistics, bucket continuous values into histograms, and produce formatted numeric reports entirely in one pass through the data.

AWK
# ── Numeric functions reference ───────────────────────────
awk 'BEGIN {
  # Integer truncation
  print int(3.9)          # 3
  print int(-3.9)         # -3

  # Square root
  print sqrt(2)           # 1.41421

  # Power (x^y)
  print 2^10              # 1024
  print 2**10             # 1024 (gawk)

  # Logarithm / exponent
  print log(2.71828)      # ~1.0 (natural log)
  print exp(1)            # 2.71828

  # Trigonometry (radians)
  pi = atan2(0, -1)       # π = 3.14159...
  print sin(pi/6)         # 0.5
  print cos(0)            # 1.0

  # Absolute value
  print (-5 < 0) ? -(-5) : -5   # 5 (manual abs)

  # Random number 0 <= rand() < 1
  srand(42)               # seed for reproducibility
  print rand()            # reproducible random
  print int(rand() * 100) # random int 0-99
}'
AWK
# Compute full statistics in a single pass
awk '
{
  n++
  sum  += $1
  sum2 += $1 * $1
  if (n == 1 || $1 < min) min = $1
  if (n == 1 || $1 > max) max = $1
}
END {
  if (n == 0) { print "No data"; exit }
  mean = sum / n
  var  = (sum2 / n) - (mean * mean)
  sd   = sqrt(var < 0 ? 0 : var)   # guard floating point
  printf "Count:  %d\n",   n
  printf "Sum:    %.3f\n",  sum
  printf "Mean:   %.3f\n",  mean
  printf "Min:    %.3f\n",  min
  printf "Max:    %.3f\n",  max
  printf "StdDev: %.3f\n",  sd
  printf "Range:  %.3f\n",  max - min
}' response_times.txt

# ── Percentiles (requires storing all values) ─────────────
awk '{ vals[NR] = $1 }
END {
  n = asort(vals)
  printf "P50:  %.3f\n", vals[int(n*0.50)]
  printf "P90:  %.3f\n", vals[int(n*0.90)]
  printf "P95:  %.3f\n", vals[int(n*0.95)]
  printf "P99:  %.3f\n", vals[int(n*0.99)]
}' response_times.txt   # gawk: asort() available

# Portable alternative (pipe to sort)
awk '{ print $1 }' response_times.txt | sort -n | awk '
{ vals[NR]=$1 }
END {
  printf "P95: %.3f\n", vals[int(NR*0.95)]
  printf "P99: %.3f\n", vals[int(NR*0.99)]
}'
AWK
# ── Bucket response times into ranges ─────────────────────
awk '{
  ms = $1 * 1000   # convert seconds to ms
  if      (ms <  100) bucket["  0-100ms"]++
  else if (ms <  250) bucket["100-250ms"]++
  else if (ms <  500) bucket["250-500ms"]++
  else if (ms < 1000) bucket["500ms-1s "]++
  else if (ms < 2000) bucket["  1s-2s  "]++
  else                bucket["   >2s   "]++
  total++
}
END {
  n = asorti(bucket, sorted)
  for (i=1; i<=n; i++) {
    b = sorted[i]
    pct = bucket[b] / total * 100
    bar = ""
    for (j=0; j access.log

# ── Dynamic bucket width ──────────────────────────────────
awk 'BEGIN { bucket_size = 10 }
{
  b = int($1 / bucket_size) * bucket_size
  label = sprintf("%d-%d", b, b+bucket_size-1)
  count[label]++
}
END {
  for (l in count) print l, count[l]
}' scores.txt | sort -t- -k1,1n
AWK
# ── Format bytes as human-readable ───────────────────────
awk 'function human(b,   s,u) {
  u[1]="B"; u[2]="KB"; u[3]="MB"; u[4]="GB"; u[5]="TB"
  for (s=1; b>=1024 && s<5; s++) b /= 1024
  return sprintf("%.1f%s", b, u[s])
}
{ print $1, human($2) }' transfer.log

# ── Format duration (seconds → human) ────────────────────
awk 'function duration(s,   d,h,m) {
  d = int(s/86400); s -= d*86400
  h = int(s/3600);  s -= h*3600
  m = int(s/60);    s -= m*60
  if (d > 0) return sprintf("%dd %dh %dm", d, h, m)
  if (h > 0) return sprintf("%dh %dm %ds", h, m, s)
  if (m > 0) return sprintf("%dm %ds", m, s)
  return sprintf("%ds", s)
}
{ print $1, duration($2) }' uptimes.txt

# ── Format with thousand separators (gawk) ───────────────
gawk 'function commas(n,   s,i,r) {
  s = sprintf("%d", n)
  r = ""
  for (i=length(s); i>0; i--) {
    if ((length(s)-i+1)%3==0 && i!=1) r = "," substr(s,i,1) r
    else r = substr(s,i,1) r
  }
  return r
}
{ printf "%-20s %s requests\n", $1, commas($2) }' stats.txt

# ── Percentage with bar chart ─────────────────────────────
awk '{ data[$1]+=$2; total+=$2 }
END {
  for (k in data) {
    pct = data[k]/total*100
    bar = sprintf("%-20s", "")
    for (i=0; i regions.txt | sort -k2 -rn
awk — statistics and histogram
vriddh@prod-01:~/scripts$awk '{n++;sum+=$1;if($1>max)max=$1;if(n==1||$1
n=8421 mean=0.247 min=0.012 max=4.821
vriddh@prod-01:~/scripts$awk '{ms=$1*1000; if(ms<100)b["<100ms"]++;else if(ms<500)b["100-500ms"]++;else b[">500ms"]++; t++} END{for(k in b) printf "%-12s %5d (%.1f%%)\n",k,b[k],b[k]/t*100}' times.txt | sort
<100ms 5841 (69.4%)
100-500ms 2213 (26.3%)
>500ms 367 (4.4%)
✔ AWK maths rules — Use atan2(0,-1) to get π portably. Always guard against division by zero with n > 0 ? sum/n : 0. For percentiles, store all values and use asort() in gawk, or sort externally and pick by position. Define reusable functions like human() and duration() in the BEGIN block or before the pattern rules. Use sprintf() not string concatenation to build formatted values.