Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering | Episode 21
Video: Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering | Episode 21 by Taught by Celeste AI - AI Coding Coach
Watch full page →Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering
Creating safe AI agents requires multiple layers of protection to prevent misuse and data leaks. This example demonstrates how to implement input validation to block dangerous keywords and enforce length limits, output filtering to redact sensitive information like SSNs and credit cards, and a tool allowlist to restrict which tools the agent can execute.
Code
import re
class SafeAIAgent:
def __init__(self):
# Blocked keywords that indicate unsafe requests
self.blocked_keywords = {'hack', 'attack', 'exploit'}
# Maximum allowed input length
self.max_input_length = 200
# Regex patterns to redact sensitive info from output
self.redaction_patterns = {
'SSN': re.compile(r'\b\d{3}-\d{2}-\d{4}\b'),
'CreditCard': re.compile(r'\b(?:\d{4}[- ]?){3}\d{4}\b'),
'Email': re.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
}
# Allowlist of permitted tools
self.allowed_tools = {'calculator', 'weather'}
def validate_input(self, user_input):
# Check length limit
if len(user_input) > self.max_input_length:
raise ValueError('Input exceeds maximum length.')
# Block if any forbidden keyword is present (case-insensitive)
lowered = user_input.lower()
if any(keyword in lowered for keyword in self.blocked_keywords):
raise ValueError('Input contains blocked keywords.')
return True
def filter_output(self, output):
# Redact sensitive data by replacing matches with '[REDACTED]'
for name, pattern in self.redaction_patterns.items():
output = pattern.sub('[REDACTED]', output)
return output
def execute_tool(self, tool_name):
# Only allow execution if tool is in allowlist
if tool_name not in self.allowed_tools:
raise PermissionError(f'Tool "{tool_name}" is not authorized.')
return f'Executing {tool_name} tool...'
# Example usage and tests
agent = SafeAIAgent()
# Test 1: Valid input passes
try:
agent.validate_input('What is the weather today?')
print('Test 1 passed: Input accepted.')
except Exception as e:
print(f'Test 1 failed: {e}')
# Test 2: Blocked keyword rejected
try:
agent.validate_input('How to hack a system?')
print('Test 2 failed: Dangerous input accepted.')
except ValueError:
print('Test 2 passed: Dangerous input blocked.')
# Test 3: Output filtering redacts sensitive info
raw_output = 'User SSN: 123-45-6789, Email: user@example.com, Card: 1234-5678-9012-3456'
filtered = agent.filter_output(raw_output)
print('Test 3 output:', filtered)
# Test 4: Unauthorized tool denied
try:
agent.execute_tool('file_manager')
print('Test 4 failed: Unauthorized tool executed.')
except PermissionError:
print('Test 4 passed: Unauthorized tool blocked.')
Key Points
- Input validation blocks dangerous keywords and enforces length limits to prevent unsafe requests.
- Output filtering uses regex patterns to redact sensitive information like SSNs, credit cards, and emails.
- A tool allowlist restricts which external tools the AI agent is permitted to run, enhancing security.
- Layered guardrails help ensure the AI agent behaves safely and protects user data.