Data retention policies govern how long data lives in hot storage before being archived or deleted — for compliance (GDPR, HIPAA), for cost (table size affects query performance and backup time), and for legal reasons. Shell scripts that implement retention policies are safer and more auditable than application-level cleanup.
1
Automated data archiving pipeline
BASH
#!/usr/bin/env bash
# archive_old_data.sh — Move aged data from hot to archive table
set -euo pipefail
DB_CONF="/etc/myapp/mysql.conf"
ARCHIVE_DB="myapp_archive"
CUTOFF_DAYS=90
BATCH_SIZE=10000
DRY_RUN="${DRY_RUN:-false}"
db() { mysql --defaults-file="${DB_CONF}" -BNs -D myapp -e "$1"; }
adb() { mysql --defaults-file="${DB_CONF}" -BNs -D "${ARCHIVE_DB}" -e "$1"; }
CUTOFF=$(date -d "${CUTOFF_DAYS} days ago" +%Y-%m-%d 2>/dev/null || date -v-${CUTOFF_DAYS}d +%Y-%m-%d)
echo "=== Data Archiving: cutoff=${CUTOFF} dry_run=${DRY_RUN} ==="
# ── Ensure archive tables exist ────────────────────────────
mysql --defaults-file="${DB_CONF}" -e "
CREATE DATABASE IF NOT EXISTS ${ARCHIVE_DB}"
db "CREATE TABLE IF NOT EXISTS ${ARCHIVE_DB}.orders LIKE myapp.orders"
db "CREATE TABLE IF NOT EXISTS ${ARCHIVE_DB}.order_items LIKE myapp.order_items"
# ── Count rows to archive ──────────────────────────────────
TO_ARCHIVE=$(db "SELECT COUNT(*) FROM orders WHERE created_at < '${CUTOFF}'")
echo " Orders to archive: ${TO_ARCHIVE}"
if "${DRY_RUN}"; then
echo " DRY RUN — no data moved"
exit 0
fi
# ── Archive in batches ────────────────────────────────────
ARCHIVED=0
while true; do
BATCH=$(db "SELECT COUNT(*) FROM orders WHERE created_at < '${CUTOFF}' LIMIT ${BATCH_SIZE}")
(( BATCH == 0 )) && break
# Move order_items first (foreign key dependency)
db "INSERT INTO ${ARCHIVE_DB}.order_items
SELECT oi.* FROM order_items oi
JOIN orders o ON o.id=oi.order_id
WHERE o.created_at < '${CUTOFF}'
LIMIT $((BATCH_SIZE * 10))
ON DUPLICATE KEY UPDATE id=oi.id"
db "DELETE oi FROM order_items oi
JOIN orders o ON o.id=oi.order_id
WHERE o.created_at < '${CUTOFF}'
LIMIT $((BATCH_SIZE * 10))"
# Then move orders
db "INSERT INTO ${ARCHIVE_DB}.orders
SELECT * FROM orders WHERE created_at < '${CUTOFF}'
LIMIT ${BATCH_SIZE} ON DUPLICATE KEY UPDATE id=id"
db "DELETE FROM orders WHERE created_at < '${CUTOFF}' LIMIT ${BATCH_SIZE}"
(( ARCHIVED += BATCH ))
printf " Archived: %d/%d\r" "${ARCHIVED}" "${TO_ARCHIVE}"
done
echo ""
echo " Archiving complete: ${ARCHIVED} orders moved to ${ARCHIVE_DB}"
2
GDPR right-to-erasure implementation
BASH
#!/usr/bin/env bash
# gdpr_erase.sh — GDPR right-to-erasure for a user account
set -euo pipefail
USER_ID="${1:?Usage: gdpr_erase.sh USER_ID}"
DB_CONF="/etc/myapp/mysql.conf"
ERASE_LOG="/var/log/myapp/gdpr-erasure.log"
db() { mysql --defaults-file="${DB_CONF}" -D myapp -e "$1"; }
log() { echo "[$(date --iso-8601=seconds)] ${*}" | tee -a "${ERASE_LOG}"; }
log "GDPR erasure request for user_id=${USER_ID}"
# Verify user exists
USER_INFO=$(mysql --defaults-file="${DB_CONF}" -BNs myapp -e "SELECT id,email FROM users WHERE id=${USER_ID}")
[[ -z "${USER_INFO}" ]] && { log "User not found"; exit 1; }
log "Erasing: ${USER_INFO}"
# Anonymise rather than delete (preserve referential integrity)
db "UPDATE users SET
email='erased-${USER_ID}@deleted.invalid',
name='[Deleted User]',
phone=NULL,
address=NULL,
gdpr_erased_at=NOW()
WHERE id=${USER_ID}"
# Delete sensitive linked data
db "DELETE FROM sessions WHERE user_id=${USER_ID}"
db "DELETE FROM password_resets WHERE user_id=${USER_ID}"
db "DELETE FROM user_tokens WHERE user_id=${USER_ID}"
db "UPDATE orders SET billing_name='[Deleted]', billing_address=NULL WHERE user_id=${USER_ID}"
log "Erasure complete for user_id=${USER_ID}"
✔ ✔ Retenti — Archive in batches with a size limit (never DELETE millions of rows in one statement — it locks the table). Always archive order_items before orders when foreign keys exist. Log every archival operation for audit. For GDPR, prefer anonymisation over deletion to maintain referential integrity — replace PII fields with placeholder values and set a
gdpr_erased_at timestamp. Test archive restore annually.