From e5482a2d9e6fc32546fac8accabd46f42dbe2bf6 Mon Sep 17 00:00:00 2001
From: Rick McEwen <mcewen@flgator.com>
Date: Mon, 5 Jan 2026 11:43:47 -0500
Subject: [PATCH] Add script to compare fine-tuned vs baseline CLIP

---
 compare_finetuned_vs_baseline.sh | 191 +++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100755 compare_finetuned_vs_baseline.sh

diff --git a/compare_finetuned_vs_baseline.sh b/compare_finetuned_vs_baseline.sh
new file mode 100755
index 0000000..1e016f3
--- /dev/null
+++ b/compare_finetuned_vs_baseline.sh
@@ -0,0 +1,191 @@
+#!/bin/bash
+#
+# Compare fine-tuned CLIP model against baseline CLIP for logo recognition.
+#
+# This script runs the same test suite on both models and outputs results
+# for easy comparison.
+#
+# Usage:
+#   ./compare_finetuned_vs_baseline.sh
+#   ./compare_finetuned_vs_baseline.sh --num-logos 100
+#
+
+set -e
+
+# Default parameters
+NUM_LOGOS="${NUM_LOGOS:-50}"
+SEED="${SEED:-42}"
+THRESHOLD="${THRESHOLD:-0.7}"
+DETR_THRESHOLD="${DETR_THRESHOLD:-0.5}"
+REFS_PER_LOGO="${REFS_PER_LOGO:-3}"
+MARGIN="${MARGIN:-0.05}"
+POSITIVE_SAMPLES="${POSITIVE_SAMPLES:-5}"
+NEGATIVE_SAMPLES="${NEGATIVE_SAMPLES:-20}"
+
+# Model paths
+BASELINE_MODEL="openai/clip-vit-large-patch14"
+FINETUNED_MODEL="models/logo_detection/clip_finetuned"
+
+# Output files
+TIMESTAMP=$(date +%Y%m%d_%H%M%S)
+OUTPUT_DIR="comparison_results"
+BASELINE_OUTPUT="${OUTPUT_DIR}/baseline_${TIMESTAMP}.txt"
+FINETUNED_OUTPUT="${OUTPUT_DIR}/finetuned_${TIMESTAMP}.txt"
+SUMMARY_OUTPUT="${OUTPUT_DIR}/comparison_summary_${TIMESTAMP}.txt"
+
+# Parse command line arguments
+while [[ $# -gt 0 ]]; do
+    case $1 in
+        -n|--num-logos)
+            NUM_LOGOS="$2"
+            shift 2
+            ;;
+        -s|--seed)
+            SEED="$2"
+            shift 2
+            ;;
+        -t|--threshold)
+            THRESHOLD="$2"
+            shift 2
+            ;;
+        --refs-per-logo)
+            REFS_PER_LOGO="$2"
+            shift 2
+            ;;
+        --margin)
+            MARGIN="$2"
+            shift 2
+            ;;
+        --finetuned-model)
+            FINETUNED_MODEL="$2"
+            shift 2
+            ;;
+        -h|--help)
+            echo "Usage: $0 [OPTIONS]"
+            echo ""
+            echo "Options:"
+            echo "  -n, --num-logos NUM      Number of logos to test (default: 50)"
+            echo "  -s, --seed SEED          Random seed for reproducibility (default: 42)"
+            echo "  -t, --threshold VAL      Similarity threshold (default: 0.7)"
+            echo "  --refs-per-logo NUM      Reference images per logo (default: 3)"
+            echo "  --margin VAL             Margin for matching (default: 0.05)"
+            echo "  --finetuned-model PATH   Path to fine-tuned model"
+            echo "  -h, --help               Show this help message"
+            exit 0
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+done
+
+# Create output directory
+mkdir -p "${OUTPUT_DIR}"
+
+# Check if fine-tuned model exists
+if [ ! -d "${FINETUNED_MODEL}" ]; then
+    echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}"
+    echo "Please train the model first using: uv run python train_clip_logo.py --config configs/jetson_orin.yaml"
+    exit 1
+fi
+
+echo "============================================================"
+echo "CLIP Logo Recognition: Fine-tuned vs Baseline Comparison"
+echo "============================================================"
+echo ""
+echo "Parameters:"
+echo "  Number of logos:    ${NUM_LOGOS}"
+echo "  Random seed:        ${SEED}"
+echo "  Threshold:          ${THRESHOLD}"
+echo "  DETR threshold:     ${DETR_THRESHOLD}"
+echo "  Refs per logo:      ${REFS_PER_LOGO}"
+echo "  Margin:             ${MARGIN}"
+echo "  Positive samples:   ${POSITIVE_SAMPLES}"
+echo "  Negative samples:   ${NEGATIVE_SAMPLES}"
+echo ""
+echo "Models:"
+echo "  Baseline:           ${BASELINE_MODEL}"
+echo "  Fine-tuned:         ${FINETUNED_MODEL}"
+echo ""
+echo "Output:"
+echo "  Baseline results:   ${BASELINE_OUTPUT}"
+echo "  Fine-tuned results: ${FINETUNED_OUTPUT}"
+echo "  Summary:            ${SUMMARY_OUTPUT}"
+echo ""
+
+# Common test arguments
+TEST_ARGS=(
+    -n "${NUM_LOGOS}"
+    -s "${SEED}"
+    -t "${THRESHOLD}"
+    -d "${DETR_THRESHOLD}"
+    --refs-per-logo "${REFS_PER_LOGO}"
+    --margin "${MARGIN}"
+    --positive-samples "${POSITIVE_SAMPLES}"
+    --negative-samples "${NEGATIVE_SAMPLES}"
+    --matching-method multi-ref
+    --clear-cache
+)
+
+# Run baseline test
+echo "============================================================"
+echo "Testing BASELINE model: ${BASELINE_MODEL}"
+echo "============================================================"
+echo ""
+
+uv run python test_logo_detection.py \
+    "${TEST_ARGS[@]}" \
+    -e "${BASELINE_MODEL}" \
+    2>&1 | tee "${BASELINE_OUTPUT}"
+
+echo ""
+echo "Baseline results saved to: ${BASELINE_OUTPUT}"
+echo ""
+
+# Run fine-tuned test
+echo "============================================================"
+echo "Testing FINE-TUNED model: ${FINETUNED_MODEL}"
+echo "============================================================"
+echo ""
+
+uv run python test_logo_detection.py \
+    "${TEST_ARGS[@]}" \
+    -e "${FINETUNED_MODEL}" \
+    2>&1 | tee "${FINETUNED_OUTPUT}"
+
+echo ""
+echo "Fine-tuned results saved to: ${FINETUNED_OUTPUT}"
+echo ""
+
+# Extract and compare key metrics
+echo "============================================================"
+echo "COMPARISON SUMMARY"
+echo "============================================================" | tee "${SUMMARY_OUTPUT}"
+echo "" | tee -a "${SUMMARY_OUTPUT}"
+echo "Test Parameters:" | tee -a "${SUMMARY_OUTPUT}"
+echo "  Logos: ${NUM_LOGOS}, Seed: ${SEED}, Threshold: ${THRESHOLD}" | tee -a "${SUMMARY_OUTPUT}"
+echo "  Method: multi-ref, Refs/logo: ${REFS_PER_LOGO}, Margin: ${MARGIN}" | tee -a "${SUMMARY_OUTPUT}"
+echo "" | tee -a "${SUMMARY_OUTPUT}"
+
+echo "BASELINE (${BASELINE_MODEL}):" | tee -a "${SUMMARY_OUTPUT}"
+grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${BASELINE_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}"
+echo "" | tee -a "${SUMMARY_OUTPUT}"
+
+echo "FINE-TUNED (${FINETUNED_MODEL}):" | tee -a "${SUMMARY_OUTPUT}"
+grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${FINETUNED_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}"
+echo "" | tee -a "${SUMMARY_OUTPUT}"
+
+# Extract F1 scores for quick comparison
+BASELINE_F1=$(grep "F1 Score" "${BASELINE_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A")
+FINETUNED_F1=$(grep "F1 Score" "${FINETUNED_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A")
+
+echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}"
+echo "F1 SCORE COMPARISON:" | tee -a "${SUMMARY_OUTPUT}"
+echo "  Baseline:    ${BASELINE_F1}" | tee -a "${SUMMARY_OUTPUT}"
+echo "  Fine-tuned:  ${FINETUNED_F1}" | tee -a "${SUMMARY_OUTPUT}"
+echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}"
+echo "" | tee -a "${SUMMARY_OUTPUT}"
+echo "Full results saved to: ${OUTPUT_DIR}/" | tee -a "${SUMMARY_OUTPUT}"
+echo ""
+echo "Done!"