• quoting just to keep original text - reporting original due to spam signature links

    wrote:

    simple script in Python that can be used to de-identify data by replacing sensitive information with placeholders:

    import re

    def deidentify_text(text):
    # Replace email addresses with
    text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', '', text)

    # Replace phone numbers with [PHONE]
    text = re.sub(r'\b(\+\d{1,2}\s?)?(\()?(\d{3})(?(2)\))[-.\s]?(\d{3})[-.\s]?(\d{4})\b', '[PHONE]', text)

    # Replace names with [NAME]
    text = re.sub(r'\b[A-Z][a-z]+\b', '[NAME]', text)

    # Replace addresses with [ADDRESS]
    text = re.sub(r'\b\d+\s\w+\s\w+\b', '[ADDRESS]', text)

    # Add more patterns and replacements for other sensitive information if needed

    return text

    # Example usage
    data = "John Doe's email is john.doe@example.com and his phone number is +1 (123) 456-7890."
    deidentified_data = deidentify_text(data)
    print(deidentified_data)