quoting just to keep original text - reporting original due to spam signature links
simple script in Python that can be used to de-identify data by replacing sensitive information with placeholders:
import re
def deidentify_text(text):
# Replace email addresses with
text = re.sub(r'\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b', '', text)
# Replace phone numbers with [PHONE]
text = re.sub(r'\b(\+\d{1,2}\s?)?(\()?(\d{3})(?(2)\))[-.\s]?(\d{3})[-.\s]?(\d{4})\b', '[PHONE]', text)
# Replace names with [NAME]
text = re.sub(r'\b[A-Z][a-z]+\b', '[NAME]', text)
# Replace addresses with [ADDRESS]
text = re.sub(r'\b\d+\s\w+\s\w+\b', '[ADDRESS]', text)
# Add more patterns and replacements for other sensitive information if needed
return text
# Example usage
data = "John Doe's email is john.doe@example.com and his phone number is +1 (123) 456-7890."
deidentified_data = deidentify_text(data)
print(deidentified_data)