Add threshold optimization script

- Test range of thresholds to find optimal F1 - Support both baseline and fine-tuned models - Option for max vs mean similarity aggregation - Output results table with TP/FP/FN/precision/recall/F1
2026-01-05 14:20:27 -05:00
parent f74d4b6981
commit 32bfefc022
1 changed files with 168 additions and 0 deletions
--- a/find_optimal_threshold.sh
+++ b/find_optimal_threshold.sh
@ -0,0 +1,168 @@
 #!/bin/bash
 #
 # Find optimal similarity threshold for logo detection.
 #
 # Tests a range of thresholds and outputs precision/recall/F1 for each.
 #
 # Usage:
 #   ./find_optimal_threshold.sh
 #   ./find_optimal_threshold.sh --model finetuned
 #   ./find_optimal_threshold.sh --model baseline
 #   ./find_optimal_threshold.sh --thresholds "0.70 0.75 0.80 0.85"
 #
 set -e
 # Default parameters
 NUM_LOGOS="${NUM_LOGOS:-50}"
 SEED="${SEED:-42}"
 REFS_PER_LOGO="${REFS_PER_LOGO:-3}"
 MARGIN="${MARGIN:-0.05}"
 MODEL="${MODEL:-finetuned}"
 USE_MAX_SIM="${USE_MAX_SIM:-false}"
 # Default thresholds to test
 THRESHOLDS="${THRESHOLDS:-0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86}"
 # Model paths
 BASELINE_MODEL="openai/clip-vit-large-patch14"
 FINETUNED_MODEL="models/logo_detection/clip_finetuned"
 # Output
 OUTPUT_DIR="threshold_analysis"
 TIMESTAMP=$(date +%Y%m%d_%H%M%S)
 # Parse command line arguments
 while [[ $# -gt 0 ]]; do
    case $1 in
        -n|--num-logos)
            NUM_LOGOS="$2"
            shift 2
            ;;
        -s|--seed)
            SEED="$2"
            shift 2
            ;;
        --model)
            MODEL="$2"
            shift 2
            ;;
        --thresholds)
            THRESHOLDS="$2"
            shift 2
            ;;
        --finetuned-path)
            FINETUNED_MODEL="$2"
            shift 2
            ;;
        --use-max-similarity)
            USE_MAX_SIM="true"
            shift
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Options:"
            echo "  -n, --num-logos NUM       Number of logos to test (default: 50)"
            echo "  -s, --seed SEED           Random seed (default: 42)"
            echo "  --model MODEL             Which model: 'baseline' or 'finetuned' (default: finetuned)"
            echo "  --thresholds \"T1 T2 ...\"  Space-separated thresholds to test"
            echo "  --finetuned-path PATH     Path to fine-tuned model"
            echo "  --use-max-similarity      Use max instead of mean for multi-ref aggregation"
            echo "  -h, --help                Show this help message"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
 done
 # Select model path
 if [[ "${MODEL}" == "baseline" ]]; then
    MODEL_PATH="${BASELINE_MODEL}"
 else
    MODEL_PATH="${FINETUNED_MODEL}"
 fi
 # Check if fine-tuned model exists
 if [[ "${MODEL}" == "finetuned" ]] && [ ! -d "${FINETUNED_MODEL}" ]; then
    echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}"
    exit 1
 fi
 # Create output directory
 mkdir -p "${OUTPUT_DIR}"
 OUTPUT_FILE="${OUTPUT_DIR}/${MODEL}_thresholds_${TIMESTAMP}.txt"
 echo "============================================================"
 echo "THRESHOLD OPTIMIZATION"
 echo "============================================================"
 echo ""
 echo "Model:      ${MODEL} (${MODEL_PATH})"
 echo "Thresholds: ${THRESHOLDS}"
 echo "Logos:      ${NUM_LOGOS}"
 echo "Seed:       ${SEED}"
 echo "Max sim:    ${USE_MAX_SIM}"
 echo "Output:     ${OUTPUT_FILE}"
 echo ""
 # Header for results
 echo "============================================================" | tee "${OUTPUT_FILE}"
 echo "THRESHOLD OPTIMIZATION RESULTS" | tee -a "${OUTPUT_FILE}"
 echo "Model: ${MODEL} (${MODEL_PATH})" | tee -a "${OUTPUT_FILE}"
 echo "============================================================" | tee -a "${OUTPUT_FILE}"
 echo "" | tee -a "${OUTPUT_FILE}"
 printf "%-10s %8s %8s %8s %8s %8s %8s\n" "Threshold" "TP" "FP" "FN" "Prec" "Recall" "F1" | tee -a "${OUTPUT_FILE}"
 echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}"
 # Track best F1
 BEST_F1=0
 BEST_THRESHOLD=""
 # Build extra args
 EXTRA_ARGS=""
 if [[ "${USE_MAX_SIM}" == "true" ]]; then
    EXTRA_ARGS="--use-max-similarity"
 fi
 # Test each threshold
 for THRESHOLD in ${THRESHOLDS}; do
    # Run test and capture output
    OUTPUT=$(uv run python test_logo_detection.py \
        -n "${NUM_LOGOS}" \
        -s "${SEED}" \
        -t "${THRESHOLD}" \
        --refs-per-logo "${REFS_PER_LOGO}" \
        --margin "${MARGIN}" \
        --matching-method multi-ref \
        -e "${MODEL_PATH}" \
        ${EXTRA_ARGS} \
        2>/dev/null)
    # Extract metrics
    TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1)
    FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1)
    FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1)
    PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
    RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
    F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
    # Print row
    printf "%-10s %8s %8s %8s %8s %8s %8s\n" "${THRESHOLD}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" | tee -a "${OUTPUT_FILE}"
    # Track best F1
    F1_NUM=$(echo "${F1}" | tr -d '%')
    BEST_NUM=$(echo "${BEST_F1}" | tr -d '%')
    if (( $(echo "${F1_NUM} > ${BEST_NUM}" | bc -l) )); then
        BEST_F1="${F1}"
        BEST_THRESHOLD="${THRESHOLD}"
    fi
 done
 echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}"
 echo "" | tee -a "${OUTPUT_FILE}"
 echo "BEST THRESHOLD: ${BEST_THRESHOLD} (F1 = ${BEST_F1})" | tee -a "${OUTPUT_FILE}"
 echo "" | tee -a "${OUTPUT_FILE}"
 echo "Results saved to: ${OUTPUT_FILE}"