Back to Blog

Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering | Episode 21

Celest KimCelest Kim

Video: Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering | Episode 21 by Taught by Celeste AI - AI Coding Coach

Watch full page →

Build AI Apps with Python: Safe AI Agents — Input Validation and Output Filtering

Creating safe AI agents requires multiple layers of protection to prevent misuse and data leaks. This example demonstrates how to implement input validation to block dangerous keywords and enforce length limits, output filtering to redact sensitive information like SSNs and credit cards, and a tool allowlist to restrict which tools the agent can execute.

Code

import re

class SafeAIAgent:
  def __init__(self):
    # Blocked keywords that indicate unsafe requests
    self.blocked_keywords = {'hack', 'attack', 'exploit'}
    # Maximum allowed input length
    self.max_input_length = 200
    # Regex patterns to redact sensitive info from output
    self.redaction_patterns = {
      'SSN': re.compile(r'\b\d{3}-\d{2}-\d{4}\b'),
      'CreditCard': re.compile(r'\b(?:\d{4}[- ]?){3}\d{4}\b'),
      'Email': re.compile(r'\b[\w.-]+@[\w.-]+\.\w+\b')
    }
    # Allowlist of permitted tools
    self.allowed_tools = {'calculator', 'weather'}

  def validate_input(self, user_input):
    # Check length limit
    if len(user_input) > self.max_input_length:
      raise ValueError('Input exceeds maximum length.')
    # Block if any forbidden keyword is present (case-insensitive)
    lowered = user_input.lower()
    if any(keyword in lowered for keyword in self.blocked_keywords):
      raise ValueError('Input contains blocked keywords.')
    return True

  def filter_output(self, output):
    # Redact sensitive data by replacing matches with '[REDACTED]'
    for name, pattern in self.redaction_patterns.items():
      output = pattern.sub('[REDACTED]', output)
    return output

  def execute_tool(self, tool_name):
    # Only allow execution if tool is in allowlist
    if tool_name not in self.allowed_tools:
      raise PermissionError(f'Tool "{tool_name}" is not authorized.')
    return f'Executing {tool_name} tool...'

# Example usage and tests
agent = SafeAIAgent()

# Test 1: Valid input passes
try:
  agent.validate_input('What is the weather today?')
  print('Test 1 passed: Input accepted.')
except Exception as e:
  print(f'Test 1 failed: {e}')

# Test 2: Blocked keyword rejected
try:
  agent.validate_input('How to hack a system?')
  print('Test 2 failed: Dangerous input accepted.')
except ValueError:
  print('Test 2 passed: Dangerous input blocked.')

# Test 3: Output filtering redacts sensitive info
raw_output = 'User SSN: 123-45-6789, Email: user@example.com, Card: 1234-5678-9012-3456'
filtered = agent.filter_output(raw_output)
print('Test 3 output:', filtered)

# Test 4: Unauthorized tool denied
try:
  agent.execute_tool('file_manager')
  print('Test 4 failed: Unauthorized tool executed.')
except PermissionError:
  print('Test 4 passed: Unauthorized tool blocked.')

Key Points

  • Input validation blocks dangerous keywords and enforces length limits to prevent unsafe requests.
  • Output filtering uses regex patterns to redact sensitive information like SSNs, credit cards, and emails.
  • A tool allowlist restricts which external tools the AI agent is permitted to run, enhancing security.
  • Layered guardrails help ensure the AI agent behaves safely and protects user data.