#!/bin/bash # ============================================================================== # Test All Models Script for Jersey Detection # ============================================================================== # This script automatically tests all models defined in llama-swap-config.yaml # with the jersey detection test suite. # # Usage: # ./test_all_models.sh # ./test_all_models.sh /path/to/images # RESIZE=2048 ./test_all_models.sh # OUTPUT_FILE=custom_results.jsonl ./test_all_models.sh # ============================================================================== # Note: We don't use 'set -e' here because we have explicit error handling # in the test loop and want to give the user the option to continue on failures # ============================================================================== # Configuration Variables # ============================================================================== # Image directory containing test images IMAGES_DIR="${1:-./test_images}" # Prompt file to use for testing PROMPT_FILE="${PROMPT_FILE:-jersey_prompt_with_confidence.txt}" # Resize images to this max dimension (set to empty string to disable) RESIZE="${RESIZE:-1024}" # Output file for results OUTPUT_FILE="${OUTPUT_FILE:-jersey_detection_results.jsonl}" # llama-swap configuration file LLAMA_SWAP_CONFIG="${LLAMA_SWAP_CONFIG:-llama-swap-config.yaml}" # Server URL SERVER_URL="${SERVER_URL:-http://localhost:8080}" # ============================================================================== # Color codes for output # ============================================================================== RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' NC='\033[0m' # No Color # ============================================================================== # Helper Functions # ============================================================================== print_header() { echo -e "${CYAN}============================================================================${NC}" echo -e "${CYAN}$1${NC}" echo -e "${CYAN}============================================================================${NC}" } print_info() { echo -e "${BLUE}[INFO]${NC} $1" } print_success() { echo -e "${GREEN}[SUCCESS]${NC} $1" } print_error() { echo -e "${RED}[ERROR]${NC} $1" } print_warning() { echo -e "${YELLOW}[WARNING]${NC} $1" } # ============================================================================== # Validation # ============================================================================== print_header "Jersey Detection - Test All Models" # Check if images directory exists if [ ! -d "$IMAGES_DIR" ]; then print_error "Image directory not found: $IMAGES_DIR" echo "Usage: $0 " exit 1 fi # Check if prompt file exists if [ ! -f "$PROMPT_FILE" ]; then print_error "Prompt file not found: $PROMPT_FILE" exit 1 fi # Check if llama-swap config exists if [ ! -f "$LLAMA_SWAP_CONFIG" ]; then print_error "llama-swap config not found: $LLAMA_SWAP_CONFIG" exit 1 fi # Check if test script exists if [ ! -f "test_jersey_detection.py" ]; then print_error "test_jersey_detection.py not found in current directory" exit 1 fi # Check if server is running print_info "Checking if llama-swap server is running at $SERVER_URL..." if ! curl -s "$SERVER_URL/health" > /dev/null 2>&1; then print_error "Cannot connect to llama-swap at $SERVER_URL" echo "" echo "Please start llama-swap first:" echo " llama-swap --config $LLAMA_SWAP_CONFIG --listen localhost:8080" echo "" exit 1 fi print_success "Server is running" # ============================================================================== # Extract model tags from YAML # ============================================================================== print_info "Extracting model tags from $LLAMA_SWAP_CONFIG..." # Extract model IDs (keys under 'models:') # This uses grep and sed to parse the YAML (simple parser, works for our format) MODEL_TAGS=$(grep "^ [a-z]" "$LLAMA_SWAP_CONFIG" | \ grep -v " " | \ sed 's/:.*//' | \ sed 's/^ //') if [ -z "$MODEL_TAGS" ]; then print_error "No model tags found in $LLAMA_SWAP_CONFIG" exit 1 fi # Convert to array readarray -t MODELS <<< "$MODEL_TAGS" MODEL_COUNT=${#MODELS[@]} print_success "Found $MODEL_COUNT models to test" # ============================================================================== # Display Configuration # ============================================================================== echo "" print_info "Test Configuration:" echo " Images directory: $IMAGES_DIR" echo " Prompt file: $PROMPT_FILE" echo " Resize: ${RESIZE:-Disabled}" echo " Output file: $OUTPUT_FILE" echo " Server URL: $SERVER_URL" echo " Models to test: $MODEL_COUNT" echo "" # List all models print_info "Models:" for i in "${!MODELS[@]}"; do echo " $((i+1)). ${MODELS[$i]}" done echo "" # ============================================================================== # Confirmation # ============================================================================== read -p "Continue with testing? (y/N) " -n 1 -r echo if [[ ! $REPLY =~ ^[Yy]$ ]]; then print_warning "Testing cancelled" exit 0 fi # ============================================================================== # Run Tests # ============================================================================== print_header "Starting Tests" START_TIME=$(date +%s) SUCCESSFUL=0 FAILED=0 for i in "${!MODELS[@]}"; do MODEL="${MODELS[$i]}" MODEL_NUM=$((i+1)) echo "" print_header "Testing Model $MODEL_NUM/$MODEL_COUNT: $MODEL" # Build command CMD="python test_jersey_detection.py \"$IMAGES_DIR\" \"$PROMPT_FILE\"" CMD="$CMD --model-tag \"$MODEL\"" CMD="$CMD --output-file \"$OUTPUT_FILE\"" CMD="$CMD --server-url \"$SERVER_URL\"" # Add resize if configured if [ -n "$RESIZE" ]; then CMD="$CMD --resize $RESIZE" fi print_info "Running: $CMD" echo "" # Run the test if eval "$CMD"; then print_success "Model $MODEL completed successfully" SUCCESSFUL=$((SUCCESSFUL + 1)) else print_error "Model $MODEL failed" FAILED=$((FAILED + 1)) # Ask if user wants to continue echo "" read -p "Continue with remaining models? (Y/n) " -n 1 -r echo if [[ $REPLY =~ ^[Nn]$ ]]; then print_warning "Testing stopped by user" break fi fi # Show progress if [ $MODEL_NUM -lt $MODEL_COUNT ]; then print_info "Progress: $MODEL_NUM/$MODEL_COUNT models completed" fi done # ============================================================================== # Summary # ============================================================================== END_TIME=$(date +%s) DURATION=$((END_TIME - START_TIME)) MINUTES=$((DURATION / 60)) SECONDS=$((DURATION % 60)) echo "" print_header "Testing Complete" echo "" print_info "Summary:" echo " Total models: $MODEL_COUNT" echo " Successful: $SUCCESSFUL" echo " Failed: $FAILED" echo " Total time: ${MINUTES}m ${SECONDS}s" echo "" if [ $SUCCESSFUL -gt 0 ]; then print_success "Results saved to: $OUTPUT_FILE" echo "" print_info "Analyze results with:" echo " python analyze_jersey_results.py $OUTPUT_FILE" fi echo "" # Exit with error code if any tests failed if [ $FAILED -gt 0 ]; then exit 1 fi exit 0