From 32bfefc022872c972de06aba829ae9ef237c4c81 Mon Sep 17 00:00:00 2001 From: Rick McEwen Date: Mon, 5 Jan 2026 14:20:27 -0500 Subject: [PATCH] Add threshold optimization script - Test range of thresholds to find optimal F1 - Support both baseline and fine-tuned models - Option for max vs mean similarity aggregation - Output results table with TP/FP/FN/precision/recall/F1 --- find_optimal_threshold.sh | 168 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) create mode 100755 find_optimal_threshold.sh diff --git a/find_optimal_threshold.sh b/find_optimal_threshold.sh new file mode 100755 index 0000000..e60ac49 --- /dev/null +++ b/find_optimal_threshold.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# +# Find optimal similarity threshold for logo detection. +# +# Tests a range of thresholds and outputs precision/recall/F1 for each. +# +# Usage: +# ./find_optimal_threshold.sh +# ./find_optimal_threshold.sh --model finetuned +# ./find_optimal_threshold.sh --model baseline +# ./find_optimal_threshold.sh --thresholds "0.70 0.75 0.80 0.85" +# + +set -e + +# Default parameters +NUM_LOGOS="${NUM_LOGOS:-50}" +SEED="${SEED:-42}" +REFS_PER_LOGO="${REFS_PER_LOGO:-3}" +MARGIN="${MARGIN:-0.05}" +MODEL="${MODEL:-finetuned}" +USE_MAX_SIM="${USE_MAX_SIM:-false}" + +# Default thresholds to test +THRESHOLDS="${THRESHOLDS:-0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86}" + +# Model paths +BASELINE_MODEL="openai/clip-vit-large-patch14" +FINETUNED_MODEL="models/logo_detection/clip_finetuned" + +# Output +OUTPUT_DIR="threshold_analysis" +TIMESTAMP=$(date +%Y%m%d_%H%M%S) + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -n|--num-logos) + NUM_LOGOS="$2" + shift 2 + ;; + -s|--seed) + SEED="$2" + shift 2 + ;; + --model) + MODEL="$2" + shift 2 + ;; + --thresholds) + THRESHOLDS="$2" + shift 2 + ;; + --finetuned-path) + FINETUNED_MODEL="$2" + shift 2 + ;; + --use-max-similarity) + USE_MAX_SIM="true" + shift + ;; + -h|--help) + echo "Usage: $0 [OPTIONS]" + echo "" + echo "Options:" + echo " -n, --num-logos NUM Number of logos to test (default: 50)" + echo " -s, --seed SEED Random seed (default: 42)" + echo " --model MODEL Which model: 'baseline' or 'finetuned' (default: finetuned)" + echo " --thresholds \"T1 T2 ...\" Space-separated thresholds to test" + echo " --finetuned-path PATH Path to fine-tuned model" + echo " --use-max-similarity Use max instead of mean for multi-ref aggregation" + echo " -h, --help Show this help message" + exit 0 + ;; + *) + echo "Unknown option: $1" + exit 1 + ;; + esac +done + +# Select model path +if [[ "${MODEL}" == "baseline" ]]; then + MODEL_PATH="${BASELINE_MODEL}" +else + MODEL_PATH="${FINETUNED_MODEL}" +fi + +# Check if fine-tuned model exists +if [[ "${MODEL}" == "finetuned" ]] && [ ! -d "${FINETUNED_MODEL}" ]; then + echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}" + exit 1 +fi + +# Create output directory +mkdir -p "${OUTPUT_DIR}" +OUTPUT_FILE="${OUTPUT_DIR}/${MODEL}_thresholds_${TIMESTAMP}.txt" + +echo "============================================================" +echo "THRESHOLD OPTIMIZATION" +echo "============================================================" +echo "" +echo "Model: ${MODEL} (${MODEL_PATH})" +echo "Thresholds: ${THRESHOLDS}" +echo "Logos: ${NUM_LOGOS}" +echo "Seed: ${SEED}" +echo "Max sim: ${USE_MAX_SIM}" +echo "Output: ${OUTPUT_FILE}" +echo "" + +# Header for results +echo "============================================================" | tee "${OUTPUT_FILE}" +echo "THRESHOLD OPTIMIZATION RESULTS" | tee -a "${OUTPUT_FILE}" +echo "Model: ${MODEL} (${MODEL_PATH})" | tee -a "${OUTPUT_FILE}" +echo "============================================================" | tee -a "${OUTPUT_FILE}" +echo "" | tee -a "${OUTPUT_FILE}" +printf "%-10s %8s %8s %8s %8s %8s %8s\n" "Threshold" "TP" "FP" "FN" "Prec" "Recall" "F1" | tee -a "${OUTPUT_FILE}" +echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}" + +# Track best F1 +BEST_F1=0 +BEST_THRESHOLD="" + +# Build extra args +EXTRA_ARGS="" +if [[ "${USE_MAX_SIM}" == "true" ]]; then + EXTRA_ARGS="--use-max-similarity" +fi + +# Test each threshold +for THRESHOLD in ${THRESHOLDS}; do + # Run test and capture output + OUTPUT=$(uv run python test_logo_detection.py \ + -n "${NUM_LOGOS}" \ + -s "${SEED}" \ + -t "${THRESHOLD}" \ + --refs-per-logo "${REFS_PER_LOGO}" \ + --margin "${MARGIN}" \ + --matching-method multi-ref \ + -e "${MODEL_PATH}" \ + ${EXTRA_ARGS} \ + 2>/dev/null) + + # Extract metrics + TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1) + FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1) + FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1) + PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) + RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) + F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) + + # Print row + printf "%-10s %8s %8s %8s %8s %8s %8s\n" "${THRESHOLD}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" | tee -a "${OUTPUT_FILE}" + + # Track best F1 + F1_NUM=$(echo "${F1}" | tr -d '%') + BEST_NUM=$(echo "${BEST_F1}" | tr -d '%') + if (( $(echo "${F1_NUM} > ${BEST_NUM}" | bc -l) )); then + BEST_F1="${F1}" + BEST_THRESHOLD="${THRESHOLD}" + fi +done + +echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}" +echo "" | tee -a "${OUTPUT_FILE}" +echo "BEST THRESHOLD: ${BEST_THRESHOLD} (F1 = ${BEST_F1})" | tee -a "${OUTPUT_FILE}" +echo "" | tee -a "${OUTPUT_FILE}" +echo "Results saved to: ${OUTPUT_FILE}"