Add threshold optimization script
- Test range of thresholds to find optimal F1 - Support both baseline and fine-tuned models - Option for max vs mean similarity aggregation - Output results table with TP/FP/FN/precision/recall/F1
This commit is contained in:
168
find_optimal_threshold.sh
Executable file
168
find_optimal_threshold.sh
Executable file
@ -0,0 +1,168 @@
|
|||||||
|
#!/bin/bash
|
||||||
|
#
|
||||||
|
# Find optimal similarity threshold for logo detection.
|
||||||
|
#
|
||||||
|
# Tests a range of thresholds and outputs precision/recall/F1 for each.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./find_optimal_threshold.sh
|
||||||
|
# ./find_optimal_threshold.sh --model finetuned
|
||||||
|
# ./find_optimal_threshold.sh --model baseline
|
||||||
|
# ./find_optimal_threshold.sh --thresholds "0.70 0.75 0.80 0.85"
|
||||||
|
#
|
||||||
|
|
||||||
|
set -e
|
||||||
|
|
||||||
|
# Default parameters
|
||||||
|
NUM_LOGOS="${NUM_LOGOS:-50}"
|
||||||
|
SEED="${SEED:-42}"
|
||||||
|
REFS_PER_LOGO="${REFS_PER_LOGO:-3}"
|
||||||
|
MARGIN="${MARGIN:-0.05}"
|
||||||
|
MODEL="${MODEL:-finetuned}"
|
||||||
|
USE_MAX_SIM="${USE_MAX_SIM:-false}"
|
||||||
|
|
||||||
|
# Default thresholds to test
|
||||||
|
THRESHOLDS="${THRESHOLDS:-0.70 0.72 0.74 0.76 0.78 0.80 0.82 0.84 0.86}"
|
||||||
|
|
||||||
|
# Model paths
|
||||||
|
BASELINE_MODEL="openai/clip-vit-large-patch14"
|
||||||
|
FINETUNED_MODEL="models/logo_detection/clip_finetuned"
|
||||||
|
|
||||||
|
# Output
|
||||||
|
OUTPUT_DIR="threshold_analysis"
|
||||||
|
TIMESTAMP=$(date +%Y%m%d_%H%M%S)
|
||||||
|
|
||||||
|
# Parse command line arguments
|
||||||
|
while [[ $# -gt 0 ]]; do
|
||||||
|
case $1 in
|
||||||
|
-n|--num-logos)
|
||||||
|
NUM_LOGOS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
-s|--seed)
|
||||||
|
SEED="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--model)
|
||||||
|
MODEL="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--thresholds)
|
||||||
|
THRESHOLDS="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--finetuned-path)
|
||||||
|
FINETUNED_MODEL="$2"
|
||||||
|
shift 2
|
||||||
|
;;
|
||||||
|
--use-max-similarity)
|
||||||
|
USE_MAX_SIM="true"
|
||||||
|
shift
|
||||||
|
;;
|
||||||
|
-h|--help)
|
||||||
|
echo "Usage: $0 [OPTIONS]"
|
||||||
|
echo ""
|
||||||
|
echo "Options:"
|
||||||
|
echo " -n, --num-logos NUM Number of logos to test (default: 50)"
|
||||||
|
echo " -s, --seed SEED Random seed (default: 42)"
|
||||||
|
echo " --model MODEL Which model: 'baseline' or 'finetuned' (default: finetuned)"
|
||||||
|
echo " --thresholds \"T1 T2 ...\" Space-separated thresholds to test"
|
||||||
|
echo " --finetuned-path PATH Path to fine-tuned model"
|
||||||
|
echo " --use-max-similarity Use max instead of mean for multi-ref aggregation"
|
||||||
|
echo " -h, --help Show this help message"
|
||||||
|
exit 0
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
echo "Unknown option: $1"
|
||||||
|
exit 1
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# Select model path
|
||||||
|
if [[ "${MODEL}" == "baseline" ]]; then
|
||||||
|
MODEL_PATH="${BASELINE_MODEL}"
|
||||||
|
else
|
||||||
|
MODEL_PATH="${FINETUNED_MODEL}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Check if fine-tuned model exists
|
||||||
|
if [[ "${MODEL}" == "finetuned" ]] && [ ! -d "${FINETUNED_MODEL}" ]; then
|
||||||
|
echo "Error: Fine-tuned model not found at ${FINETUNED_MODEL}"
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Create output directory
|
||||||
|
mkdir -p "${OUTPUT_DIR}"
|
||||||
|
OUTPUT_FILE="${OUTPUT_DIR}/${MODEL}_thresholds_${TIMESTAMP}.txt"
|
||||||
|
|
||||||
|
echo "============================================================"
|
||||||
|
echo "THRESHOLD OPTIMIZATION"
|
||||||
|
echo "============================================================"
|
||||||
|
echo ""
|
||||||
|
echo "Model: ${MODEL} (${MODEL_PATH})"
|
||||||
|
echo "Thresholds: ${THRESHOLDS}"
|
||||||
|
echo "Logos: ${NUM_LOGOS}"
|
||||||
|
echo "Seed: ${SEED}"
|
||||||
|
echo "Max sim: ${USE_MAX_SIM}"
|
||||||
|
echo "Output: ${OUTPUT_FILE}"
|
||||||
|
echo ""
|
||||||
|
|
||||||
|
# Header for results
|
||||||
|
echo "============================================================" | tee "${OUTPUT_FILE}"
|
||||||
|
echo "THRESHOLD OPTIMIZATION RESULTS" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "Model: ${MODEL} (${MODEL_PATH})" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "============================================================" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "" | tee -a "${OUTPUT_FILE}"
|
||||||
|
printf "%-10s %8s %8s %8s %8s %8s %8s\n" "Threshold" "TP" "FP" "FN" "Prec" "Recall" "F1" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}"
|
||||||
|
|
||||||
|
# Track best F1
|
||||||
|
BEST_F1=0
|
||||||
|
BEST_THRESHOLD=""
|
||||||
|
|
||||||
|
# Build extra args
|
||||||
|
EXTRA_ARGS=""
|
||||||
|
if [[ "${USE_MAX_SIM}" == "true" ]]; then
|
||||||
|
EXTRA_ARGS="--use-max-similarity"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Test each threshold
|
||||||
|
for THRESHOLD in ${THRESHOLDS}; do
|
||||||
|
# Run test and capture output
|
||||||
|
OUTPUT=$(uv run python test_logo_detection.py \
|
||||||
|
-n "${NUM_LOGOS}" \
|
||||||
|
-s "${SEED}" \
|
||||||
|
-t "${THRESHOLD}" \
|
||||||
|
--refs-per-logo "${REFS_PER_LOGO}" \
|
||||||
|
--margin "${MARGIN}" \
|
||||||
|
--matching-method multi-ref \
|
||||||
|
-e "${MODEL_PATH}" \
|
||||||
|
${EXTRA_ARGS} \
|
||||||
|
2>/dev/null)
|
||||||
|
|
||||||
|
# Extract metrics
|
||||||
|
TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1)
|
||||||
|
FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1)
|
||||||
|
FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1)
|
||||||
|
PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||||
|
RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||||
|
F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||||
|
|
||||||
|
# Print row
|
||||||
|
printf "%-10s %8s %8s %8s %8s %8s %8s\n" "${THRESHOLD}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" | tee -a "${OUTPUT_FILE}"
|
||||||
|
|
||||||
|
# Track best F1
|
||||||
|
F1_NUM=$(echo "${F1}" | tr -d '%')
|
||||||
|
BEST_NUM=$(echo "${BEST_F1}" | tr -d '%')
|
||||||
|
if (( $(echo "${F1_NUM} > ${BEST_NUM}" | bc -l) )); then
|
||||||
|
BEST_F1="${F1}"
|
||||||
|
BEST_THRESHOLD="${THRESHOLD}"
|
||||||
|
fi
|
||||||
|
done
|
||||||
|
|
||||||
|
echo "--------------------------------------------------------------------" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "BEST THRESHOLD: ${BEST_THRESHOLD} (F1 = ${BEST_F1})" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "" | tee -a "${OUTPUT_FILE}"
|
||||||
|
echo "Results saved to: ${OUTPUT_FILE}"
|
||||||
Reference in New Issue
Block a user