#!/bin/sh
#
# Pre-commit hook to check for sensitive information
# 
# To install this hook, run:
#   git config core.hooksPath .git-hooks
#

# Try to run Python version if available
if command -v python3 >/dev/null 2>&1; then
    python3 "$(dirname "$0")/pre-commit.py"
    exit $?
elif command -v python >/dev/null 2>&1; then
    python "$(dirname "$0")/pre-commit.py"
    exit $?
fi

# Fallback to shell script (no colors on Windows CMD)
echo "Running pre-commit checks..."

# Flag to track if any issues found
ISSUES_FOUND=0

# Get list of staged files
STAGED_FILES=$(git diff --cached --name-only --diff-filter=ACM)

if [ -z "$STAGED_FILES" ]; then
    echo "No files to check"
    exit 0
fi

echo "Checking staged files for sensitive information..."

# Patterns to search for sensitive information
PATTERNS=(
    # API Keys and Tokens
    "api[_-]?key['\"]?\s*[:=]\s*['\"][a-zA-Z0-9_-]{20,}['\"]"
    "token['\"]?\s*[:=]\s*['\"][a-zA-Z0-9_-]{20,}['\"]"
    "secret['\"]?\s*[:=]\s*['\"][a-zA-Z0-9_-]{20,}['\"]"
    
    # AWS Credentials
    "AKIA[0-9A-Z]{16}"
    "aws[_-]?access[_-]?key"
    "aws[_-]?secret"
    
    # Private Keys
    "BEGIN.*PRIVATE KEY"
    "BEGIN RSA PRIVATE KEY"
    
    # Passwords
    "password['\"]?\s*[:=]\s*['\"][^'\"]{8,}['\"]"
    "passwd['\"]?\s*[:=]\s*['\"][^'\"]{8,}['\"]"
)

# Check each staged file
for FILE in $STAGED_FILES; do
    # Skip .git-hooks directory (contains pattern definitions)
    if echo "$FILE" | grep -q "^\.git-hooks/"; then
        continue
    fi
    
    # Skip binary files and specific file types
    if file "$FILE" 2>/dev/null | grep -q "text"; then
        for PATTERN in "${PATTERNS[@]}"; do
            MATCHES=$(grep -inE "$PATTERN" "$FILE" 2>/dev/null)
            if [ ! -z "$MATCHES" ]; then
                echo "X Potential sensitive information found in: $FILE"
                echo "  Pattern matched: $PATTERN"
                echo "$MATCHES"
                ISSUES_FOUND=1
            fi
        done
    fi
done

# Check for common sensitive file names
SENSITIVE_FILES=(
    "*.key"
    "*.pem"
    "*.env"
    ".env.local"
    "secrets.json"
    "config.local.*"
    "credentials.*"
)

for FILE in $STAGED_FILES; do
    for PATTERN in "${SENSITIVE_FILES[@]}"; do
        if echo "$FILE" | grep -qE "$PATTERN"; then
            echo "X Sensitive file detected: $FILE"
            echo "  This file type should not be committed"
            ISSUES_FOUND=1
        fi
    done
done

# Check for __pycache__ directories
if echo "$STAGED_FILES" | grep -q "__pycache__"; then
    echo "X __pycache__ directory found in staged files"
    echo "  Python cache files should not be committed"
    ISSUES_FOUND=1
fi

# Final result
if [ $ISSUES_FOUND -eq 1 ]; then
    echo ""
    echo "========================================"
    echo "X Pre-commit check FAILED"
    echo "  Please review and remove sensitive information"
    echo "  To bypass this check (NOT recommended): git commit --no-verify"
    echo "========================================"
    exit 1
else
    echo "Pre-commit check PASSED"
    exit 0
fi
