AWK is a complete numeric processing engine, not just a text tool. Its mathematical functions cover everything from basic rounding and random numbers to trigonometry and logarithms. Combined with aggregation arrays, AWK can compute descriptive statistics, bucket continuous values into histograms, and produce formatted numeric reports entirely in one pass through the data.
1
AWK built-in mathematical functions
AWK
# ── Numeric functions reference ───────────────────────────
awk 'BEGIN {
# Integer truncation
print int(3.9) # 3
print int(-3.9) # -3
# Square root
print sqrt(2) # 1.41421
# Power (x^y)
print 2^10 # 1024
print 2**10 # 1024 (gawk)
# Logarithm / exponent
print log(2.71828) # ~1.0 (natural log)
print exp(1) # 2.71828
# Trigonometry (radians)
pi = atan2(0, -1) # π = 3.14159...
print sin(pi/6) # 0.5
print cos(0) # 1.0
# Absolute value
print (-5 < 0) ? -(-5) : -5 # 5 (manual abs)
# Random number 0 <= rand() < 1
srand(42) # seed for reproducibility
print rand() # reproducible random
print int(rand() * 100) # random int 0-99
}'
2
Descriptive statistics — mean, min, max, stddev
AWK
# Compute full statistics in a single pass
awk '
{
n++
sum += $1
sum2 += $1 * $1
if (n == 1 || $1 < min) min = $1
if (n == 1 || $1 > max) max = $1
}
END {
if (n == 0) { print "No data"; exit }
mean = sum / n
var = (sum2 / n) - (mean * mean)
sd = sqrt(var < 0 ? 0 : var) # guard floating point
printf "Count: %d\n", n
printf "Sum: %.3f\n", sum
printf "Mean: %.3f\n", mean
printf "Min: %.3f\n", min
printf "Max: %.3f\n", max
printf "StdDev: %.3f\n", sd
printf "Range: %.3f\n", max - min
}' response_times.txt
# ── Percentiles (requires storing all values) ─────────────
awk '{ vals[NR] = $1 }
END {
n = asort(vals)
printf "P50: %.3f\n", vals[int(n*0.50)]
printf "P90: %.3f\n", vals[int(n*0.90)]
printf "P95: %.3f\n", vals[int(n*0.95)]
printf "P99: %.3f\n", vals[int(n*0.99)]
}' response_times.txt # gawk: asort() available
# Portable alternative (pipe to sort)
awk '{ print $1 }' response_times.txt | sort -n | awk '
{ vals[NR]=$1 }
END {
printf "P95: %.3f\n", vals[int(NR*0.95)]
printf "P99: %.3f\n", vals[int(NR*0.99)]
}'
3
Histograms and bucket analysis
AWK
# ── Bucket response times into ranges ─────────────────────
awk '{
ms = $1 * 1000 # convert seconds to ms
if (ms < 100) bucket[" 0-100ms"]++
else if (ms < 250) bucket["100-250ms"]++
else if (ms < 500) bucket["250-500ms"]++
else if (ms < 1000) bucket["500ms-1s "]++
else if (ms < 2000) bucket[" 1s-2s "]++
else bucket[" >2s "]++
total++
}
END {
n = asorti(bucket, sorted)
for (i=1; i<=n; i++) {
b = sorted[i]
pct = bucket[b] / total * 100
bar = ""
for (j=0; j access.log
# ── Dynamic bucket width ──────────────────────────────────
awk 'BEGIN { bucket_size = 10 }
{
b = int($1 / bucket_size) * bucket_size
label = sprintf("%d-%d", b, b+bucket_size-1)
count[label]++
}
END {
for (l in count) print l, count[l]
}' scores.txt | sort -t- -k1,1n
4
Human-readable number formatting
AWK
# ── Format bytes as human-readable ───────────────────────
awk 'function human(b, s,u) {
u[1]="B"; u[2]="KB"; u[3]="MB"; u[4]="GB"; u[5]="TB"
for (s=1; b>=1024 && s<5; s++) b /= 1024
return sprintf("%.1f%s", b, u[s])
}
{ print $1, human($2) }' transfer.log
# ── Format duration (seconds → human) ────────────────────
awk 'function duration(s, d,h,m) {
d = int(s/86400); s -= d*86400
h = int(s/3600); s -= h*3600
m = int(s/60); s -= m*60
if (d > 0) return sprintf("%dd %dh %dm", d, h, m)
if (h > 0) return sprintf("%dh %dm %ds", h, m, s)
if (m > 0) return sprintf("%dm %ds", m, s)
return sprintf("%ds", s)
}
{ print $1, duration($2) }' uptimes.txt
# ── Format with thousand separators (gawk) ───────────────
gawk 'function commas(n, s,i,r) {
s = sprintf("%d", n)
r = ""
for (i=length(s); i>0; i--) {
if ((length(s)-i+1)%3==0 && i!=1) r = "," substr(s,i,1) r
else r = substr(s,i,1) r
}
return r
}
{ printf "%-20s %s requests\n", $1, commas($2) }' stats.txt
# ── Percentage with bar chart ─────────────────────────────
awk '{ data[$1]+=$2; total+=$2 }
END {
for (k in data) {
pct = data[k]/total*100
bar = sprintf("%-20s", "")
for (i=0; i regions.txt | sort -k2 -rn
vriddh@prod-01:~/scripts$awk '{n++;sum+=$1;if($1>max)max=$1;if(n==1||$1
n=8421 mean=0.247 min=0.012 max=4.821
vriddh@prod-01:~/scripts$awk '{ms=$1*1000; if(ms<100)b["<100ms"]++;else if(ms<500)b["100-500ms"]++;else b[">500ms"]++; t++} END{for(k in b) printf "%-12s %5d (%.1f%%)\n",k,b[k],b[k]/t*100}' times.txt | sort
<100ms 5841 (69.4%)
100-500ms 2213 (26.3%)
>500ms 367 (4.4%)
█
✔ AWK maths rules — Use
atan2(0,-1) to get π portably. Always guard against division by zero with n > 0 ? sum/n : 0. For percentiles, store all values and use asort() in gawk, or sort externally and pick by position. Define reusable functions like human() and duration() in the BEGIN block or before the pattern rules. Use sprintf() not string concatenation to build formatted values.