Field transformation is where AWK pays the biggest dividends in production data pipelines. Converting CSV to TSV, adding derived columns, reordering fields for downstream tools, normalising inconsistent values — these are the tasks AWK handles in a single pass without temp files or multiple tool invocations.
1
Delimiter conversion and field reordering
AWK
# ── Delimiter conversion ──────────────────────────────────
awk 'BEGIN{FS=",";OFS="\t"} {$1=$1; print}' # CSV → TSV
awk 'BEGIN{FS="\t";OFS=","} {$1=$1; print}' # TSV → CSV
awk 'BEGIN{FS="|";OFS=","} {$1=$1; print}' # pipe-delim → CSV
# NOTE: $1=$1 triggers AWK to rebuild $0 using OFS
# Without it, changing OFS does not reformat the line
# ── Reorder fields ────────────────────────────────────────
awk -F',' 'BEGIN{OFS=","} {print $3,$1,$2}' # move col3 first
awk -F',' 'BEGIN{OFS=","} {print $NF,$0}' # prepend last field
# ── Swap two specific fields ──────────────────────────────
awk -F',' 'BEGIN{OFS=","} {
tmp=$2; $2=$4; $4=tmp; print
}'
# ── Remove specific field (field 3) ──────────────────────
awk -F',' 'BEGIN{OFS=","} {
for(i=1;i<=NF;i++) if(i!=3) printf "%s%s",$i,(i==NF?"\n":OFS)
}'
# ── Keep only specific fields ─────────────────────────────
awk -F',' 'BEGIN{OFS=","} {print $1,$3,$5}' # keep cols 1,3,5
2
Adding computed columns
AWK
# ── Add a status column based on value ───────────────────
awk -F',' 'BEGIN{OFS=","; print "name,cpu,status"}
NR>1 {
status = ($2 > 90) ? "CRITICAL" : ($2 > 70 ? "WARN" : "OK")
print $1, $2, status
}' servers.csv
# ── Add a computed percentage column ─────────────────────
awk -F',' 'BEGIN{OFS=","; print "host,used,total,pct"}
NR>1 {
pct = ($2/$3)*100
printf "%s,%s,%s,%.1f%%\n", $1, $2, $3, pct
}' disk.csv
# ── Add timestamp column ──────────────────────────────────
awk -F',' 'BEGIN{OFS=","}
{
cmd = "date +%Y-%m-%dT%H:%M:%S"
cmd | getline ts
close(cmd)
print ts, $0
}' events.csv
# ── Add running total column ──────────────────────────────
awk -F',' 'BEGIN{OFS=","; print "date,amount,cumulative"}
NR>1 {
total += $2
printf "%s,%s,%.2f\n", $1, $2, total
}' transactions.csv
# ── Categorise numeric field ──────────────────────────────
awk -F',' 'BEGIN{OFS=","}
NR>1 {
score = $3 + 0
if (score >= 90) grade = "A"
else if (score >= 80) grade = "B"
else if (score >= 70) grade = "C"
else if (score >= 60) grade = "D"
else grade = "F"
print $0, grade
}' students.csv
3
Normalising values and formats
AWK
# ── Normalise hostnames to lowercase ─────────────────────
awk -F',' 'BEGIN{OFS=","} {$1=tolower($1); print}'
# ── Standardise date formats ──────────────────────────────
# Convert DD/MM/YYYY → YYYY-MM-DD
awk -F',' 'BEGIN{OFS=","}
NR>1 {
n = split($2, d, "/") # split DD/MM/YYYY
if (n == 3) $2 = d[3] "-" d[2] "-" d[1]
print
}' events.csv
# ── Convert bytes to MB in a field ───────────────────────
awk -F',' 'BEGIN{OFS=","}
NR>1 { $3 = sprintf("%.1f", $3/1048576); print }' transfers.csv
# ── Strip leading/trailing whitespace from all fields ─────
awk -F',' 'BEGIN{OFS=","}
{
for(i=1;i<=NF;i++) {
gsub(/^[[:space:]]+|[[:space:]]+$/, "", $i)
}
print
}'
# ── Replace empty fields with a default ──────────────────
awk -F',' 'BEGIN{OFS=","}
{
for(i=1;i<=NF;i++) if($i=="") $i="N/A"
print
}'
4
Pivot and transpose
AWK
# ── Transpose rows and columns ────────────────────────────
awk -F',' '{ for(i=1;i<=NF;i++) cell[NR][i]=$i; maxcol=(NF>maxcol?NF:maxcol) }
END {
for(c=1;c<=maxcol;c++) {
for(r=1;r<=NR;r++) printf "%s%s", cell[r][c], (r data.csv
# ── Wide to long format (unpivot) ────────────────────────
# Input: host, cpu, mem, disk
# Output: host, metric, value
awk -F',' 'NR==1 {
for(i=2;i<=NF;i++) headers[i]=$i; next
}
{
host=$1
for(i=2;i<=NF;i++) printf "%s,%s,%s\n", host, headers[i], $i
}' servers.csv
# ── Long to wide format (pivot) ───────────────────────────
# Input: host, metric, value
# Output: one row per host, one column per metric
awk -F',' '{ val[$1][$2]=$3; hosts[$1]=1; metrics[$2]=1 }
END {
for(m in metrics) printf ",%s", m; print ""
for(h in hosts) {
printf "%s", h
for(m in metrics) printf ",%s", val[h][m]+0
print ""
}
}' long_format.csv
vriddh@prod-01:~/scripts$awk -F',' 'BEGIN{OFS=","; print "name,cpu,status"} NR>1{s=($2>90)?"CRITICAL":($2>70?"WARN":"OK"); print $1,$2,s}' servers.csv
name,cpu,status
prod-web-01,38,OK
prod-db-01,91,CRITICAL
prod-cache-01,23,OK
█
✔ Transformation rules — Always use
$1=$1 (the no-op assignment) to trigger AWK to rebuild $0 with the new OFS — without it, OFS changes are ignored when printing $0. When adding columns, use printf for precise control rather than relying on field assignment. For pivoting, use two-dimensional arrays (val[row][col]) and collect all distinct values in a first pass. Always print a header row with BEGIN{print "col1,col2,..."}.