#!/bin/bash # # Compare fine-tuned CLIP model against baseline CLIP for logo recognition. # # This script runs the same test suite on both models and outputs results # for easy comparison. # # Usage: # ./compare_finetuned_vs_baseline.sh # ./compare_finetuned_vs_baseline.sh --num-logos 100 # set -e # Default parameters NUM_LOGOS="${NUM_LOGOS:-50}" SEED="${SEED:-42}" THRESHOLD="${THRESHOLD:-0.7}" DETR_THRESHOLD="${DETR_THRESHOLD:-0.5}" REFS_PER_LOGO="${REFS_PER_LOGO:-3}" MARGIN="${MARGIN:-0.05}" POSITIVE_SAMPLES="${POSITIVE_SAMPLES:-5}" NEGATIVE_SAMPLES="${NEGATIVE_SAMPLES:-20}" # Model paths BASELINE_MODEL="openai/clip-vit-large-patch14" FINETUNED_MODEL="models/logo_detection/clip_finetuned" # Output files TIMESTAMP=$(date +%Y%m%d_%H%M%S) OUTPUT_DIR="comparison_results" BASELINE_OUTPUT="${OUTPUT_DIR}/baseline_${TIMESTAMP}.txt" FINETUNED_OUTPUT="${OUTPUT_DIR}/finetuned_${TIMESTAMP}.txt" SUMMARY_OUTPUT="${OUTPUT_DIR}/comparison_summary_${TIMESTAMP}.txt" # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in -n|--num-logos) NUM_LOGOS="$2" shift 2 ;; -s|--seed) SEED="$2" shift 2 ;; -t|--threshold) THRESHOLD="$2" shift 2 ;; --refs-per-logo) REFS_PER_LOGO="$2" shift 2 ;; --margin) MARGIN="$2" shift 2 ;; --finetuned-model) FINETUNED_MODEL="$2" shift 2 ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "" echo "Options:" echo " -n, --num-logos NUM Number of logos to test (default: 50)" echo " -s, --seed SEED Random seed for reproducibility (default: 42)" echo " -t, --threshold VAL Similarity threshold (default: 0.7)" echo " --refs-per-logo NUM Reference images per logo (default: 3)" echo " --margin VAL Margin for matching (default: 0.05)" echo " --finetuned-model PATH Path to fine-tuned model" echo " -h, --help Show this help message" exit 0 ;; *) echo "Unknown option: $1" exit 1 ;; esac done # Create output directory mkdir -p "${OUTPUT_DIR}" # Check if fine-tuned model exists if [ ! -d "${FINETUNED_MODEL}" ]; then echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}" echo "Please train the model first using: uv run python train_clip_logo.py --config configs/jetson_orin.yaml" exit 1 fi echo "============================================================" echo "CLIP Logo Recognition: Fine-tuned vs Baseline Comparison" echo "============================================================" echo "" echo "Parameters:" echo " Number of logos: ${NUM_LOGOS}" echo " Random seed: ${SEED}" echo " Threshold: ${THRESHOLD}" echo " DETR threshold: ${DETR_THRESHOLD}" echo " Refs per logo: ${REFS_PER_LOGO}" echo " Margin: ${MARGIN}" echo " Positive samples: ${POSITIVE_SAMPLES}" echo " Negative samples: ${NEGATIVE_SAMPLES}" echo "" echo "Models:" echo " Baseline: ${BASELINE_MODEL}" echo " Fine-tuned: ${FINETUNED_MODEL}" echo "" echo "Output:" echo " Baseline results: ${BASELINE_OUTPUT}" echo " Fine-tuned results: ${FINETUNED_OUTPUT}" echo " Summary: ${SUMMARY_OUTPUT}" echo "" # Common test arguments TEST_ARGS=( -n "${NUM_LOGOS}" -s "${SEED}" -t "${THRESHOLD}" -d "${DETR_THRESHOLD}" --refs-per-logo "${REFS_PER_LOGO}" --margin "${MARGIN}" --positive-samples "${POSITIVE_SAMPLES}" --negative-samples "${NEGATIVE_SAMPLES}" --matching-method multi-ref --clear-cache ) # Run baseline test echo "============================================================" echo "Testing BASELINE model: ${BASELINE_MODEL}" echo "============================================================" echo "" uv run python test_logo_detection.py \ "${TEST_ARGS[@]}" \ -e "${BASELINE_MODEL}" \ 2>&1 | tee "${BASELINE_OUTPUT}" echo "" echo "Baseline results saved to: ${BASELINE_OUTPUT}" echo "" # Run fine-tuned test echo "============================================================" echo "Testing FINE-TUNED model: ${FINETUNED_MODEL}" echo "============================================================" echo "" uv run python test_logo_detection.py \ "${TEST_ARGS[@]}" \ -e "${FINETUNED_MODEL}" \ 2>&1 | tee "${FINETUNED_OUTPUT}" echo "" echo "Fine-tuned results saved to: ${FINETUNED_OUTPUT}" echo "" # Extract and compare key metrics echo "============================================================" echo "COMPARISON SUMMARY" echo "============================================================" | tee "${SUMMARY_OUTPUT}" echo "" | tee -a "${SUMMARY_OUTPUT}" echo "Test Parameters:" | tee -a "${SUMMARY_OUTPUT}" echo " Logos: ${NUM_LOGOS}, Seed: ${SEED}, Threshold: ${THRESHOLD}" | tee -a "${SUMMARY_OUTPUT}" echo " Method: multi-ref, Refs/logo: ${REFS_PER_LOGO}, Margin: ${MARGIN}" | tee -a "${SUMMARY_OUTPUT}" echo "" | tee -a "${SUMMARY_OUTPUT}" echo "BASELINE (${BASELINE_MODEL}):" | tee -a "${SUMMARY_OUTPUT}" grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${BASELINE_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}" echo "" | tee -a "${SUMMARY_OUTPUT}" echo "FINE-TUNED (${FINETUNED_MODEL}):" | tee -a "${SUMMARY_OUTPUT}" grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${FINETUNED_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}" echo "" | tee -a "${SUMMARY_OUTPUT}" # Extract F1 scores for quick comparison BASELINE_F1=$(grep "F1 Score" "${BASELINE_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A") FINETUNED_F1=$(grep "F1 Score" "${FINETUNED_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A") echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}" echo "F1 SCORE COMPARISON:" | tee -a "${SUMMARY_OUTPUT}" echo " Baseline: ${BASELINE_F1}" | tee -a "${SUMMARY_OUTPUT}" echo " Fine-tuned: ${FINETUNED_F1}" | tee -a "${SUMMARY_OUTPUT}" echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}" echo "" | tee -a "${SUMMARY_OUTPUT}" echo "Full results saved to: ${OUTPUT_DIR}/" | tee -a "${SUMMARY_OUTPUT}" echo "" echo "Done!"