#!/bin/bash # # Find optimal similarity threshold for logo detection. # # Tests a range of thresholds and outputs precision/recall/F1 for each. # # Usage: # ./find_optimal_threshold.sh # ./find_optimal_threshold.sh --model finetuned # ./find_optimal_threshold.sh --model baseline # ./find_optimal_threshold.sh --thresholds "0.70 0.75 0.80 0.85" # set -e # Default parameters NUM_LOGOS="${NUM_LOGOS:-50}" SEED="${SEED:-42}" REFS_PER_LOGO="${REFS_PER_LOGO:-3}" MARGIN="${MARGIN:-0.05}" MODEL="${MODEL:-finetuned}" USE_MAX_SIM="${USE_MAX_SIM:-false}" # Default thresholds to test THRESHOLDS="${THRESHOLDS:-0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86}" # Model paths BASELINE_MODEL="openai/clip-vit-large-patch14" FINETUNED_MODEL="models/logo_detection/clip_finetuned" # Output OUTPUT_DIR="threshold_analysis" TIMESTAMP=$(date +%Y%m%d_%H%M%S) # Parse command line arguments while [[ $# -gt 0 ]]; do case $1 in -n|--num-logos) NUM_LOGOS="$2" shift 2 ;; -s|--seed) SEED="$2" shift 2 ;; --model) MODEL="$2" shift 2 ;; --thresholds) THRESHOLDS="$2" shift 2 ;; --finetuned-path) FINETUNED_MODEL="$2" shift 2 ;; --use-max-similarity) USE_MAX_SIM="true" shift ;; -h|--help) echo "Usage: $0 [OPTIONS]" echo "" echo "Options:" echo " -n, --num-logos NUM Number of logos to test (default: 50)" echo " -s, --seed SEED Random seed (default: 42)" echo " --model MODEL Which model: 'baseline' or 'finetuned' (default: finetuned)" echo " --thresholds \"T1 T2 ...\" Space-separated thresholds to test" echo " --finetuned-path PATH Path to fine-tuned model" echo " --use-max-similarity Use max instead of mean for multi-ref aggregation" echo " -h, --help Show this help message" exit 0 ;; *) echo "Unknown option: $1" exit 1 ;; esac done # Select model path if [[ "${MODEL}" == "baseline" ]]; then MODEL_PATH="${BASELINE_MODEL}" else MODEL_PATH="${FINETUNED_MODEL}" fi # Check if fine-tuned model exists if [[ "${MODEL}" == "finetuned" ]] && [ ! -d "${FINETUNED_MODEL}" ]; then echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}" exit 1 fi # Create output directory mkdir -p "${OUTPUT_DIR}" OUTPUT_FILE="${OUTPUT_DIR}/${MODEL}_thresholds_${TIMESTAMP}.txt" echo "============================================================" echo "THRESHOLD OPTIMIZATION" echo "============================================================" echo "" echo "Model: ${MODEL} (${MODEL_PATH})" echo "Thresholds: ${THRESHOLDS}" echo "Logos: ${NUM_LOGOS}" echo "Seed: ${SEED}" echo "Max sim: ${USE_MAX_SIM}" echo "Output: ${OUTPUT_FILE}" echo "" # Header for results echo "============================================================" | tee "${OUTPUT_FILE}" echo "THRESHOLD OPTIMIZATION RESULTS" | tee -a "${OUTPUT_FILE}" echo "Model: ${MODEL} (${MODEL_PATH})" | tee -a "${OUTPUT_FILE}" echo "============================================================" | tee -a "${OUTPUT_FILE}" echo "" | tee -a "${OUTPUT_FILE}" printf "%-10s %8s %8s %8s %8s %8s %8s\n" "Threshold" "TP" "FP" "FN" "Prec" "Recall" "F1" | tee -a "${OUTPUT_FILE}" echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}" # Track best F1 BEST_F1=0 BEST_THRESHOLD="" # Build extra args EXTRA_ARGS="" if [[ "${USE_MAX_SIM}" == "true" ]]; then EXTRA_ARGS="--use-max-similarity" fi # Test each threshold for THRESHOLD in ${THRESHOLDS}; do # Run test and capture output OUTPUT=$(uv run python test_logo_detection.py \ -n "${NUM_LOGOS}" \ -s "${SEED}" \ -t "${THRESHOLD}" \ --refs-per-logo "${REFS_PER_LOGO}" \ --margin "${MARGIN}" \ --matching-method multi-ref \ -e "${MODEL_PATH}" \ ${EXTRA_ARGS} \ 2>/dev/null) # Extract metrics TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1) FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1) FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1) PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1) # Print row printf "%-10s %8s %8s %8s %8s %8s %8s\n" "${THRESHOLD}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" | tee -a "${OUTPUT_FILE}" # Track best F1 F1_NUM=$(echo "${F1}" | tr -d '%') BEST_NUM=$(echo "${BEST_F1}" | tr -d '%') if (( $(echo "${F1_NUM} > ${BEST_NUM}" | bc -l) )); then BEST_F1="${F1}" BEST_THRESHOLD="${THRESHOLD}" fi done echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}" echo "" | tee -a "${OUTPUT_FILE}" echo "BEST THRESHOLD: ${BEST_THRESHOLD} (F1 = ${BEST_F1})" | tee -a "${OUTPUT_FILE}" echo "" | tee -a "${OUTPUT_FILE}" echo "Results saved to: ${OUTPUT_FILE}"