#PII Masking Cookbook

Version: 0.40.0 | Updated: 2026-03-23 | Applies to: ranvier-core 0.40+ | Category: Cookbook


#Overview

Personally Identifiable Information (PII) detection and masking is a regulatory requirement (GDPR, CCPA, PIPA) for any SaaS handling user data. Ranvier's Guard pattern naturally fits this use case: a Guard scans input for PII patterns and either blocks, masks, or warns — before the data reaches business logic.

This cookbook covers PII regex patterns for multiple locales, masking strategies, Guard chains, policy configuration via Bus, and audit integration.


#1. PII Pattern Definitions

Define regex patterns for common PII types. Each pattern covers a specific locale.

use regex::Regex;

#[derive(Debug, Clone)]
pub struct PiiPattern {
    pub name: &'static str,
    pub regex: Regex,
    pub locale: &'static str,
}

pub fn pii_patterns() -> Vec<PiiPattern> {
    vec![
        // Korea: Resident Registration Number (주민등록번호)
        PiiPattern {
            name: "KR_RRN",
            regex: Regex::new(r"\d{6}-[1-4]\d{6}").unwrap(),
            locale: "KR",
        },
        // Korea: Phone number
        PiiPattern {
            name: "KR_PHONE",
            regex: Regex::new(r"01[016789]-?\d{3,4}-?\d{4}").unwrap(),
            locale: "KR",
        },
        // US: Social Security Number
        PiiPattern {
            name: "US_SSN",
            regex: Regex::new(r"\d{3}-\d{2}-\d{4}").unwrap(),
            locale: "US",
        },
        // Common: Email
        PiiPattern {
            name: "EMAIL",
            regex: Regex::new(r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}").unwrap(),
            locale: "GLOBAL",
        },
        // Common: Credit card (Luhn-checkable patterns)
        PiiPattern {
            name: "CREDIT_CARD",
            regex: Regex::new(r"\b(?:\d{4}[- ]?){3}\d{4}\b").unwrap(),
            locale: "GLOBAL",
        },
        // Common: IP Address (v4)
        PiiPattern {
            name: "IPV4",
            regex: Regex::new(r"\b\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}\b").unwrap(),
            locale: "GLOBAL",
        },
    ]
}

#2. Masking Strategies

Different contexts require different masking approaches.

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum MaskingStrategy {
    /// Replace entirely with [REDACTED]
    Redact,
    /// Show first/last characters: 123-**-4567
    Partial,
    /// Replace with SHA-256 hash
    Hash,
    /// Replace with consistent pseudonym
    Pseudonymize,
}

pub fn apply_mask(value: &str, strategy: MaskingStrategy) -> String {
    match strategy {
        MaskingStrategy::Redact => "[REDACTED]".to_string(),
        MaskingStrategy::Partial => {
            if value.len() <= 4 {
                "****".to_string()
            } else {
                let first = &value[..2];
                let last = &value[value.len()-2..];
                format!("{}{}{}",
                    first,
                    "*".repeat(value.len() - 4),
                    last
                )
            }
        }
        MaskingStrategy::Hash => {
            use sha2::{Sha256, Digest};
            let hash = Sha256::digest(value.as_bytes());
            format!("sha256:{}", hex::encode(&hash[..8]))
        }
        MaskingStrategy::Pseudonymize => {
            use sha2::{Sha256, Digest};
            let hash = Sha256::digest(value.as_bytes());
            // Consistent pseudonym from hash
            format!("user_{}", hex::encode(&hash[..4]))
        }
    }
}

#3. PII Scanning Guard

A Guard that scans string input for PII patterns and applies the configured policy.

use async_trait::async_trait;
use ranvier_core::prelude::*;

/// Bus type: PII detection policy.
#[derive(Debug, Clone)]
pub struct PiiPolicy {
    pub action: PiiAction,
    pub strategy: MaskingStrategy,
    pub locales: Vec<String>,  // empty = all locales
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum PiiAction {
    Block,  // Reject the request entirely
    Mask,   // Replace PII and continue
    Warn,   // Log warning but pass through
}

impl Default for PiiPolicy {
    fn default() -> Self {
        Self {
            action: PiiAction::Mask,
            strategy: MaskingStrategy::Redact,
            locales: vec![],
        }
    }
}

#[derive(Debug, Clone)]
pub struct PiiScanResult {
    pub detections: Vec<PiiDetection>,
    pub masked_text: String,
}

#[derive(Debug, Clone)]
pub struct PiiDetection {
    pub pattern_name: String,
    pub original: String,
    pub masked: String,
    pub position: usize,
}

pub struct PiiScanGuard;

#[async_trait]
impl Transition<String, String> for PiiScanGuard {
    type Error = String;
    type Resources = ();

    async fn run(
        &self,
        input: String,
        _resources: &Self::Resources,
        bus: &mut Bus,
    ) -> Outcome<String, Self::Error> {
        let policy = bus.get_cloned::<PiiPolicy>()
            .unwrap_or_default();

        let patterns = pii_patterns();
        let active_patterns: Vec<_> = patterns.iter()
            .filter(|p| {
                policy.locales.is_empty() || policy.locales.contains(&p.locale.to_string())
            })
            .collect();

        let mut detections = Vec::new();
        let mut masked = input.clone();

        for pattern in &active_patterns {
            for mat in pattern.regex.find_iter(&input) {
                let original = mat.as_str().to_string();
                let replacement = apply_mask(&original, policy.strategy);

                detections.push(PiiDetection {
                    pattern_name: pattern.name.to_string(),
                    original: original.clone(),
                    masked: replacement.clone(),
                    position: mat.start(),
                });

                masked = masked.replace(&original, &replacement);
            }
        }

        // Store scan result in Bus for audit
        bus.insert(PiiScanResult {
            detections: detections.clone(),
            masked_text: masked.clone(),
        });

        if detections.is_empty() {
            return Outcome::next(input);
        }

        match policy.action {
            PiiAction::Block => {
                let names: Vec<_> = detections.iter()
                    .map(|d| d.pattern_name.as_str())
                    .collect();
                Outcome::fault(format!(
                    "400 PII detected: {} pattern(s) found: {:?}",
                    detections.len(), names
                ))
            }
            PiiAction::Mask => {
                tracing::info!(
                    count = detections.len(),
                    "PII detected and masked"
                );
                Outcome::next(masked)
            }
            PiiAction::Warn => {
                tracing::warn!(
                    count = detections.len(),
                    "PII detected (warn mode — passing through)"
                );
                Outcome::next(input)
            }
        }
    }
}

#4. JSON Body PII Scanning

For structured payloads, recursively scan all string fields.

pub fn scan_json_value(
    value: &mut serde_json::Value,
    patterns: &[PiiPattern],
    strategy: MaskingStrategy,
) -> Vec<PiiDetection> {
    let mut detections = Vec::new();

    match value {
        serde_json::Value::String(s) => {
            for pattern in patterns {
                for mat in pattern.regex.find_iter(&s.clone()) {
                    let original = mat.as_str().to_string();
                    let replacement = apply_mask(&original, strategy);
                    detections.push(PiiDetection {
                        pattern_name: pattern.name.to_string(),
                        original,
                        masked: replacement.clone(),
                        position: mat.start(),
                    });
                    *s = s.replace(mat.as_str(), &replacement);
                }
            }
        }
        serde_json::Value::Object(map) => {
            for (_key, val) in map.iter_mut() {
                detections.extend(scan_json_value(val, patterns, strategy));
            }
        }
        serde_json::Value::Array(arr) => {
            for val in arr.iter_mut() {
                detections.extend(scan_json_value(val, patterns, strategy));
            }
        }
        _ => {}
    }

    detections
}

/// Guard for JSON payloads
pub struct JsonPiiScanGuard;

#[async_trait]
impl Transition<serde_json::Value, serde_json::Value> for JsonPiiScanGuard {
    type Error = String;
    type Resources = ();

    async fn run(
        &self,
        mut input: serde_json::Value,
        _resources: &Self::Resources,
        bus: &mut Bus,
    ) -> Outcome<serde_json::Value, Self::Error> {
        let policy = bus.get_cloned::<PiiPolicy>().unwrap_or_default();
        let patterns = pii_patterns();
        let detections = scan_json_value(&mut input, &patterns, policy.strategy);

        bus.insert(PiiScanResult {
            detections: detections.clone(),
            masked_text: input.to_string(),
        });

        if detections.is_empty() || policy.action != PiiAction::Block {
            Outcome::next(input)
        } else {
            Outcome::fault(format!(
                "400 PII detected in JSON: {} fields contain PII",
                detections.len()
            ))
        }
    }
}

#5. Double-Scan Guard Chain

Scan both input and output to catch PII that may be generated by business logic.

use ranvier_runtime::Axon;

let pii_protected_pipeline = Axon::typed::<String, String>("pii-protected")
    // Phase 1: Scan input
    .then(PiiScanGuard)
    // Phase 2: Business logic (receives masked input)
    .then_fn("process", |input, _res, _bus| async move {
        // Your business logic here
        let result = format!("Processed: {input}");
        Outcome::next(result)
    })
    // Phase 3: Scan output (catches PII from DB lookups, LLM responses, etc.)
    .then(PiiScanGuard);

#6. Policy Configuration via Bus

Different routes can have different PII policies.

use ranvier_http::prelude::*;
use ranvier_guard::prelude::*;

Ranvier::http()
    .bus_injector(|_parts, bus| {
        // Default: mask PII
        bus.insert(PiiPolicy::default());
    })
    // Strict route: block any PII
    .post_typed("/api/public/submit", {
        Axon::typed::<String, String>("strict-pii")
            .then_fn("set-strict", |input, _res, bus| async move {
                bus.insert(PiiPolicy {
                    action: PiiAction::Block,
                    strategy: MaskingStrategy::Redact,
                    locales: vec![],
                });
                Outcome::next(input)
            })
            .then(PiiScanGuard)
            .then_fn("process", |input, _res, _bus| async move {
                Outcome::next(format!("OK: {input}"))
            })
    })
    // Internal route: warn only
    .post_typed("/api/internal/process", {
        Axon::typed::<String, String>("warn-pii")
            .then_fn("set-warn", |input, _res, bus| async move {
                bus.insert(PiiPolicy {
                    action: PiiAction::Warn,
                    strategy: MaskingStrategy::Partial,
                    locales: vec![],
                });
                Outcome::next(input)
            })
            .then(PiiScanGuard)
            .then_fn("process", |input, _res, _bus| async move {
                Outcome::next(format!("OK: {input}"))
            })
    })
    .run(())
    .await?;

#7. Audit Integration

Record PII detection events in the audit hash chain for compliance reporting.

use ranvier_audit::prelude::*;

let audited_pii_pipeline = Axon::typed::<String, String>("audited-pii")
    .then(PiiScanGuard)
    .then_fn("audit-pii", |input, _res, bus| async move {
        let scan_result = bus.get_cloned::<PiiScanResult>();

        if let Ok(result) = scan_result {
            if !result.detections.is_empty() {
                if let Ok(audit) = bus.get_cloned::<AuditTrail>() {
                    let patterns: Vec<_> = result.detections.iter()
                        .map(|d| d.pattern_name.as_str())
                        .collect();

                    audit.record(AuditEntry {
                        action: "pii.detected".into(),
                        actor: bus.get_cloned::<super::TenantContext>()
                            .map(|t| t.tenant_id)
                            .unwrap_or_else(|_| "system".into()),
                        resource: format!(
                            "patterns={:?}, count={}",
                            patterns, result.detections.len()
                        ),
                        timestamp: chrono::Utc::now(),
                    }).await;
                }
            }
        }

        Outcome::next(input)
    })
    .then_fn("process", |input, _res, _bus| async move {
        Outcome::next(format!("Processed: {input}"))
    });

#8. Testing PII Detection

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_korean_rrn_detection() {
        let patterns = pii_patterns();
        let rrn_pattern = patterns.iter().find(|p| p.name == "KR_RRN").unwrap();
        assert!(rrn_pattern.regex.is_match("950101-1234567"));
        assert!(!rrn_pattern.regex.is_match("950101"));
    }

    #[test]
    fn test_us_ssn_detection() {
        let patterns = pii_patterns();
        let ssn_pattern = patterns.iter().find(|p| p.name == "US_SSN").unwrap();
        assert!(ssn_pattern.regex.is_match("123-45-6789"));
        assert!(!ssn_pattern.regex.is_match("12345"));
    }

    #[test]
    fn test_email_detection() {
        let patterns = pii_patterns();
        let email_pattern = patterns.iter().find(|p| p.name == "EMAIL").unwrap();
        assert!(email_pattern.regex.is_match("user@example.com"));
        assert!(!email_pattern.regex.is_match("not-an-email"));
    }

    #[test]
    fn test_masking_strategies() {
        assert_eq!(
            apply_mask("123-45-6789", MaskingStrategy::Redact),
            "[REDACTED]"
        );
        assert_eq!(
            apply_mask("123-45-6789", MaskingStrategy::Partial),
            "12*******89"
        );
        // Hash and pseudonymize produce consistent outputs
        let hash1 = apply_mask("test@email.com", MaskingStrategy::Hash);
        let hash2 = apply_mask("test@email.com", MaskingStrategy::Hash);
        assert_eq!(hash1, hash2);
    }

    #[tokio::test]
    async fn test_pii_guard_block_mode() {
        let mut bus = Bus::new();
        bus.insert(PiiPolicy {
            action: PiiAction::Block,
            strategy: MaskingStrategy::Redact,
            locales: vec![],
        });

        let guard = PiiScanGuard;
        let result = guard.run(
            "My SSN is 123-45-6789".to_string(),
            &(),
            &mut bus,
        ).await;

        assert!(matches!(result, Outcome::Fault(_)));
    }

    #[tokio::test]
    async fn test_pii_guard_mask_mode() {
        let mut bus = Bus::new();
        bus.insert(PiiPolicy {
            action: PiiAction::Mask,
            strategy: MaskingStrategy::Redact,
            locales: vec![],
        });

        let guard = PiiScanGuard;
        let result = guard.run(
            "My SSN is 123-45-6789".to_string(),
            &(),
            &mut bus,
        ).await;

        match result {
            Outcome::Next(masked) => {
                assert!(masked.contains("[REDACTED]"));
                assert!(!masked.contains("123-45-6789"));
            }
            _ => panic!("should succeed with masked value"),
        }
    }

    #[tokio::test]
    async fn test_clean_input_passes_through() {
        let mut bus = Bus::new();
        bus.insert(PiiPolicy::default());

        let guard = PiiScanGuard;
        let result = guard.run(
            "Hello, this is clean text".to_string(),
            &(),
            &mut bus,
        ).await;

        match result {
            Outcome::Next(text) => assert_eq!(text, "Hello, this is clean text"),
            _ => panic!("clean text should pass through"),
        }
    }

    #[test]
    fn test_json_pii_scan() {
        let mut json = serde_json::json!({
            "name": "John",
            "email": "john@example.com",
            "nested": {
                "ssn": "123-45-6789"
            }
        });

        let patterns = pii_patterns();
        let detections = scan_json_value(
            &mut json,
            &patterns,
            MaskingStrategy::Redact,
        );

        assert_eq!(detections.len(), 2); // email + SSN
        assert_eq!(json["email"], "[REDACTED]");
        assert_eq!(json["nested"]["ssn"], "[REDACTED]");
    }
}

#See Also

  • Guard Patterns Cookbook — Guard composition patterns
  • Multi-Tenant Isolation Cookbook — tenant-scoped PII policies
  • Bus Access Patterns Cookbook — Bus policy configuration
  • LLM Gateway Cookbook — PII scanning for LLM inputs/outputs