Add script to compare fine-tuned vs baseline CLIP
This commit is contained in:
191
compare_finetuned_vs_baseline.sh
Executable file
191
compare_finetuned_vs_baseline.sh
Executable file
@ -0,0 +1,191 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Compare fine-tuned CLIP model against baseline CLIP for logo recognition.
|
||||
#
|
||||
# This script runs the same test suite on both models and outputs results
|
||||
# for easy comparison.
|
||||
#
|
||||
# Usage:
|
||||
# ./compare_finetuned_vs_baseline.sh
|
||||
# ./compare_finetuned_vs_baseline.sh --num-logos 100
|
||||
#
|
||||
|
||||
set -e
|
||||
|
||||
# Default parameters
|
||||
NUM_LOGOS="${NUM_LOGOS:-50}"
|
||||
SEED="${SEED:-42}"
|
||||
THRESHOLD="${THRESHOLD:-0.7}"
|
||||
DETR_THRESHOLD="${DETR_THRESHOLD:-0.5}"
|
||||
REFS_PER_LOGO="${REFS_PER_LOGO:-3}"
|
||||
MARGIN="${MARGIN:-0.05}"
|
||||
POSITIVE_SAMPLES="${POSITIVE_SAMPLES:-5}"
|
||||
NEGATIVE_SAMPLES="${NEGATIVE_SAMPLES:-20}"
|
||||
|
||||
# Model paths
|
||||
BASELINE_MODEL="openai/clip-vit-large-patch14"
|
||||
FINETUNED_MODEL="models/logo_detection/clip_finetuned"
|
||||
|
||||
# Output files
|
||||
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||
OUTPUT_DIR="comparison_results"
|
||||
BASELINE_OUTPUT="${OUTPUT_DIR}/baseline_${TIMESTAMP}.txt"
|
||||
FINETUNED_OUTPUT="${OUTPUT_DIR}/finetuned_${TIMESTAMP}.txt"
|
||||
SUMMARY_OUTPUT="${OUTPUT_DIR}/comparison_summary_${TIMESTAMP}.txt"
|
||||
|
||||
# Parse command line arguments
|
||||
while [[ $# -gt 0 ]]; do
|
||||
case $1 in
|
||||
-n|--num-logos)
|
||||
NUM_LOGOS="$2"
|
||||
shift 2
|
||||
;;
|
||||
-s|--seed)
|
||||
SEED="$2"
|
||||
shift 2
|
||||
;;
|
||||
-t|--threshold)
|
||||
THRESHOLD="$2"
|
||||
shift 2
|
||||
;;
|
||||
--refs-per-logo)
|
||||
REFS_PER_LOGO="$2"
|
||||
shift 2
|
||||
;;
|
||||
--margin)
|
||||
MARGIN="$2"
|
||||
shift 2
|
||||
;;
|
||||
--finetuned-model)
|
||||
FINETUNED_MODEL="$2"
|
||||
shift 2
|
||||
;;
|
||||
-h|--help)
|
||||
echo "Usage: $0 [OPTIONS]"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " -n, --num-logos NUM Number of logos to test (default: 50)"
|
||||
echo " -s, --seed SEED Random seed for reproducibility (default: 42)"
|
||||
echo " -t, --threshold VAL Similarity threshold (default: 0.7)"
|
||||
echo " --refs-per-logo NUM Reference images per logo (default: 3)"
|
||||
echo " --margin VAL Margin for matching (default: 0.05)"
|
||||
echo " --finetuned-model PATH Path to fine-tuned model"
|
||||
echo " -h, --help Show this help message"
|
||||
exit 0
|
||||
;;
|
||||
*)
|
||||
echo "Unknown option: $1"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# Create output directory
|
||||
mkdir -p "${OUTPUT_DIR}"
|
||||
|
||||
# Check if fine-tuned model exists
|
||||
if [ ! -d "${FINETUNED_MODEL}" ]; then
|
||||
echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}"
|
||||
echo "Please train the model first using: uv run python train_clip_logo.py --config configs/jetson_orin.yaml"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "============================================================"
|
||||
echo "CLIP Logo Recognition: Fine-tuned vs Baseline Comparison"
|
||||
echo "============================================================"
|
||||
echo ""
|
||||
echo "Parameters:"
|
||||
echo " Number of logos: ${NUM_LOGOS}"
|
||||
echo " Random seed: ${SEED}"
|
||||
echo " Threshold: ${THRESHOLD}"
|
||||
echo " DETR threshold: ${DETR_THRESHOLD}"
|
||||
echo " Refs per logo: ${REFS_PER_LOGO}"
|
||||
echo " Margin: ${MARGIN}"
|
||||
echo " Positive samples: ${POSITIVE_SAMPLES}"
|
||||
echo " Negative samples: ${NEGATIVE_SAMPLES}"
|
||||
echo ""
|
||||
echo "Models:"
|
||||
echo " Baseline: ${BASELINE_MODEL}"
|
||||
echo " Fine-tuned: ${FINETUNED_MODEL}"
|
||||
echo ""
|
||||
echo "Output:"
|
||||
echo " Baseline results: ${BASELINE_OUTPUT}"
|
||||
echo " Fine-tuned results: ${FINETUNED_OUTPUT}"
|
||||
echo " Summary: ${SUMMARY_OUTPUT}"
|
||||
echo ""
|
||||
|
||||
# Common test arguments
|
||||
TEST_ARGS=(
|
||||
-n "${NUM_LOGOS}"
|
||||
-s "${SEED}"
|
||||
-t "${THRESHOLD}"
|
||||
-d "${DETR_THRESHOLD}"
|
||||
--refs-per-logo "${REFS_PER_LOGO}"
|
||||
--margin "${MARGIN}"
|
||||
--positive-samples "${POSITIVE_SAMPLES}"
|
||||
--negative-samples "${NEGATIVE_SAMPLES}"
|
||||
--matching-method multi-ref
|
||||
--clear-cache
|
||||
)
|
||||
|
||||
# Run baseline test
|
||||
echo "============================================================"
|
||||
echo "Testing BASELINE model: ${BASELINE_MODEL}"
|
||||
echo "============================================================"
|
||||
echo ""
|
||||
|
||||
uv run python test_logo_detection.py \
|
||||
"${TEST_ARGS[@]}" \
|
||||
-e "${BASELINE_MODEL}" \
|
||||
2>&1 | tee "${BASELINE_OUTPUT}"
|
||||
|
||||
echo ""
|
||||
echo "Baseline results saved to: ${BASELINE_OUTPUT}"
|
||||
echo ""
|
||||
|
||||
# Run fine-tuned test
|
||||
echo "============================================================"
|
||||
echo "Testing FINE-TUNED model: ${FINETUNED_MODEL}"
|
||||
echo "============================================================"
|
||||
echo ""
|
||||
|
||||
uv run python test_logo_detection.py \
|
||||
"${TEST_ARGS[@]}" \
|
||||
-e "${FINETUNED_MODEL}" \
|
||||
2>&1 | tee "${FINETUNED_OUTPUT}"
|
||||
|
||||
echo ""
|
||||
echo "Fine-tuned results saved to: ${FINETUNED_OUTPUT}"
|
||||
echo ""
|
||||
|
||||
# Extract and compare key metrics
|
||||
echo "============================================================"
|
||||
echo "COMPARISON SUMMARY"
|
||||
echo "============================================================" | tee "${SUMMARY_OUTPUT}"
|
||||
echo "" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "Test Parameters:" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo " Logos: ${NUM_LOGOS}, Seed: ${SEED}, Threshold: ${THRESHOLD}" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo " Method: multi-ref, Refs/logo: ${REFS_PER_LOGO}, Margin: ${MARGIN}" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "" | tee -a "${SUMMARY_OUTPUT}"
|
||||
|
||||
echo "BASELINE (${BASELINE_MODEL}):" | tee -a "${SUMMARY_OUTPUT}"
|
||||
grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${BASELINE_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "" | tee -a "${SUMMARY_OUTPUT}"
|
||||
|
||||
echo "FINE-TUNED (${FINETUNED_MODEL}):" | tee -a "${SUMMARY_OUTPUT}"
|
||||
grep -E "(Precision|Recall|F1 Score|True Positives|False Positives|False Negatives)" "${FINETUNED_OUTPUT}" | head -6 | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "" | tee -a "${SUMMARY_OUTPUT}"
|
||||
|
||||
# Extract F1 scores for quick comparison
|
||||
BASELINE_F1=$(grep "F1 Score" "${BASELINE_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A")
|
||||
FINETUNED_F1=$(grep "F1 Score" "${FINETUNED_OUTPUT}" | head -1 | grep -oE "[0-9]+\.[0-9]+%" | head -1 || echo "N/A")
|
||||
|
||||
echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "F1 SCORE COMPARISON:" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo " Baseline: ${BASELINE_F1}" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo " Fine-tuned: ${FINETUNED_F1}" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "------------------------------------------------------------" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo "Full results saved to: ${OUTPUT_DIR}/" | tee -a "${SUMMARY_OUTPUT}"
|
||||
echo ""
|
||||
echo "Done!"
|
||||
Reference in New Issue
Block a user