From e5482a2d9e6fc32546fac8accabd46f42dbe2bf6 Mon Sep 17 00:00:00 2001 From: Rick McEwen Date: Mon, 5 Jan 2026 11:43:47 -0500 Subject: [PATCH] Add script to compare fine-tuned vs baseline CLIP --- compare_finetuned_vs_baseline.sh | 191 +++++++++++++++++++++++++++++++ 1 file changed, 191 insertions(+) create mode 100755 compare_finetuned_vs_baseline.sh diff --git a/compare_finetuned_vs_baseline.sh b/compare_finetuned_vs_baseline.sh new file mode 100755 index 0000000..1e016f3 --- /dev/null +++ b/compare_finetuned_vs_baseline.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# +# Compare fine-tuned CLIP model against baseline CLIP for logo recognition. +# +# This script runs the same test suite on both models and outputs results +# for easy comparison. +# +# Usage: +# ./compare_finetuned_vs_baseline.sh +# ./compare_finetuned_vs_baseline.sh --num-logos 100 +# + +set -e + +# Default parameters +NUM_LOGOS="${NUM_LOGOS:-50}" +SEED="${SEED:-42}" +THRESHOLD="${THRESHOLD:-0.7}" +DETR_THRESHOLD="${DETR_THRESHOLD:-0.5}" +REFS_PER_LOGO="${REFS_PER_LOGO:-3}" +MARGIN="${MARGIN:-0.05}" +POSITIVE_SAMPLES="${POSITIVE_SAMPLES:-5}" +NEGATIVE_SAMPLES="${NEGATIVE_SAMPLES:-20}" + +# Model paths +BASELINE_MODEL="openai/clip-vit-large-patch14" +FINETUNED_MODEL="models/logo_detection/clip_finetuned" + +# Output files +TIMESTAMP=$(date +%Y%m%d_%H%M%S) +OUTPUT_DIR="comparison_results" +BASELINE_OUTPUT="${OUTPUT_DIR}/baseline_${TIMESTAMP}.txt" +FINETUNED_OUTPUT="${OUTPUT_DIR}/finetuned_${TIMESTAMP}.txt" +SUMMARY_OUTPUT="${OUTPUT_DIR}/comparison_summary_${TIMESTAMP}.txt" + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -n|--num-logos) + NUM_LOGOS="$2" + shift 2 + ;; + -s|--seed) + SEED="$2" + shift 2 + ;; + -t|--threshold) + THRESHOLD="$2" + shift 2 + ;; + --refs-per-logo) + REFS_PER_LOGO="$2" + shift 2 + ;; + --margin) + MARGIN="$2" + shift 2 + ;; + --finetuned-model) + FINETUNED_MODEL="$2" + shift 2 + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -n, --num-logos NUM Number of logos to test (default: 50)" + echo " -s, --seed SEED Random seed for reproducibility (default: 42)" + echo " -t, --threshold VAL Similarity threshold (default: 0.7)" + echo " --refs-per-logo NUM Reference images per logo (default: 3)" + echo " --margin VAL Margin for matching (default: 0.05)" + echo " --finetuned-model PATH Path to fine-tuned model" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Create output directory +mkdir -p "${OUTPUT_DIR}" + +# Check if fine-tuned model exists +if [ ! -d "${FINETUNED_MODEL}" ]; then + echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}" + echo "Please train the model first using: uv run python train_clip_logo.py --config configs/jetson_orin.yaml" + exit 1 +fi + +echo "============================================================" +echo "CLIP Logo Recognition: Fine-tuned vs Baseline Comparison" +echo "============================================================" +echo "" +echo "Parameters:" +echo " Number of logos: ${NUM_LOGOS}" +echo " Random seed: ${SEED}" +echo " Threshold: ${THRESHOLD}" +echo " DETR threshold: ${DETR_THRESHOLD}" +echo " Refs per logo: ${REFS_PER_LOGO}" +echo " Margin: ${MARGIN}" +echo " Positive samples: ${POSITIVE_SAMPLES}" +echo " Negative samples: ${NEGATIVE_SAMPLES}" +echo "" +echo "Models:" +echo " Baseline: ${BASELINE_MODEL}" +echo " Fine-tuned: ${FINETUNED_MODEL}" +echo "" +echo "Output:" +echo " Baseline results: ${BASELINE_OUTPUT}" +echo " Fine-tuned results: ${FINETUNED_OUTPUT}" +echo " Summary: ${SUMMARY_OUTPUT}" +echo "" + +# Common test arguments +TEST_ARGS=( + -n "${NUM_LOGOS}" + -s "${SEED}" + -t "${THRESHOLD}" + -d "${DETR_THRESHOLD}" + --refs-per-logo "${REFS_PER_LOGO}" + --margin "${MARGIN}" + --positive-samples "${POSITIVE_SAMPLES}" + --negative-samples "${NEGATIVE_SAMPLES}" + --matching-method multi-ref + --clear-cache +) + +# Run baseline test +echo "============================================================" +echo "Testing BASELINE model: ${BASELINE_MODEL}" +echo "============================================================" +echo "" + +uv run python test_logo_detection.py \ + "${TEST_ARGS[@]}" \ + -e "${BASELINE_MODEL}" \ + 2>&1 | tee "${BASELINE_OUTPUT}" + +echo "" +echo "Baseline results saved to: ${BASELINE_OUTPUT}" +echo "" + +# Run fine-tuned test +echo "============================================================" +echo "Testing FINE-TUNED model: ${FINETUNED_MODEL}" +echo "============================================================" +echo "" + +uv run python test_logo_detection.py \ + "${TEST_ARGS[@]}" \ + -e "${FINETUNED_MODEL}" \ + 2>&1 | tee "${FINETUNED_OUTPUT}" + +echo "" +echo "Fine-tuned results saved to: ${FINETUNED_OUTPUT}" +echo "" + +# Extract and compare key metrics +echo "============================================================" +echo "COMPARISON SUMMARY" +echo "============================================================" | tee "${SUMMARY_OUTPUT}" +echo "" | tee -a "${SUMMARY_OUTPUT}" +echo "Test Parameters:" | tee -a "${SUMMARY_OUTPUT}" +echo " Logos: ${NUM_LOGOS}, Seed: ${SEED}, Threshold: ${THRESHOLD}" | tee -a "${SUMMARY_OUTPUT}" +echo " Method: multi-ref, Refs/logo: ${REFS_PER_LOGO}, Margin: ${MARGIN}" | tee -a "${SUMMARY_OUTPUT}" +echo "" | tee -a "${SUMMARY_OUTPUT}" + +echo "BASELINE (${BASELINE_MODEL}):" | tee -a "${SUMMARY_OUTPUT}" +grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${BASELINE_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}" +echo "" | tee -a "${SUMMARY_OUTPUT}" + +echo "FINE-TUNED (${FINETUNED_MODEL}):" | tee -a "${SUMMARY_OUTPUT}" +grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${FINETUNED_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}" +echo "" | tee -a "${SUMMARY_OUTPUT}" + +# Extract F1 scores for quick comparison +BASELINE_F1=$(grep "F1 Score" "${BASELINE_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A") +FINETUNED_F1=$(grep "F1 Score" "${FINETUNED_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A") + +echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}" +echo "F1 SCORE COMPARISON:" | tee -a "${SUMMARY_OUTPUT}" +echo " Baseline: ${BASELINE_F1}" | tee -a "${SUMMARY_OUTPUT}" +echo " Fine-tuned: ${FINETUNED_F1}" | tee -a "${SUMMARY_OUTPUT}" +echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}" +echo "" | tee -a "${SUMMARY_OUTPUT}" +echo "Full results saved to: ${OUTPUT_DIR}/" | tee -a "${SUMMARY_OUTPUT}" +echo "" +echo "Done!"