logo_test/analyze_similarity_distribution.sh

#!/bin/bash
#
# Analyze similarity distribution for baseline and fine-tuned models.
#
# This script runs the test with --similarity-details to output detailed
# statistics about how the models score matches vs non-matches.
#
# Usage:
#   ./analyze_similarity_distribution.sh
#   ./analyze_similarity_distribution.sh --model finetuned
#   ./analyze_similarity_distribution.sh --model baseline
#

set -e

# Default parameters
NUM_LOGOS="${NUM_LOGOS:-50}"
SEED="${SEED:-42}"
THRESHOLD="${THRESHOLD:-0.75}"
REFS_PER_LOGO="${REFS_PER_LOGO:-3}"
MARGIN="${MARGIN:-0.05}"
MODEL="${MODEL:-both}"

# Model paths
BASELINE_MODEL="openai/clip-vit-large-patch14"
FINETUNED_MODEL="models/logo_detection/clip_finetuned"

# Output directory
OUTPUT_DIR="similarity_analysis"
TIMESTAMP=$(date +%Y%m%d_%H%M%S)

# Parse command line arguments
while [[ $# -gt 0 ]]; do
    case $1 in
        -n|--num-logos)
            NUM_LOGOS="$2"
            shift 2
            ;;
        -s|--seed)
            SEED="$2"
            shift 2
            ;;
        -t|--threshold)
            THRESHOLD="$2"
            shift 2
            ;;
        --model)
            MODEL="$2"
            shift 2
            ;;
        --finetuned-path)
            FINETUNED_MODEL="$2"
            shift 2
            ;;
        -h|--help)
            echo "Usage: $0 [OPTIONS]"
            echo ""
            echo "Options:"
            echo "  -n, --num-logos NUM     Number of logos to test (default: 50)"
            echo "  -s, --seed SEED         Random seed (default: 42)"
            echo "  -t, --threshold VAL     Similarity threshold (default: 0.75)"
            echo "  --model MODEL           Which model: 'baseline', 'finetuned', or 'both' (default: both)"
            echo "  --finetuned-path PATH   Path to fine-tuned model"
            echo "  -h, --help              Show this help message"
            exit 0
            ;;
        *)
            echo "Unknown option: $1"
            exit 1
            ;;
    esac
done

# Create output directory
mkdir -p "${OUTPUT_DIR}"

echo "============================================================"
echo "SIMILARITY DISTRIBUTION ANALYSIS"
echo "============================================================"
echo ""
echo "Parameters:"
echo "  Number of logos: ${NUM_LOGOS}"
echo "  Random seed:     ${SEED}"
echo "  Threshold:       ${THRESHOLD}"
echo "  Refs per logo:   ${REFS_PER_LOGO}"
echo "  Margin:          ${MARGIN}"
echo "  Model:           ${MODEL}"
echo ""

# Common test arguments
TEST_ARGS=(
    -n "${NUM_LOGOS}"
    -s "${SEED}"
    -t "${THRESHOLD}"
    --refs-per-logo "${REFS_PER_LOGO}"
    --margin "${MARGIN}"
    --matching-method multi-ref
    --similarity-details
    --clear-cache
)

run_analysis() {
    local model_name="$1"
    local model_path="$2"
    local output_file="${OUTPUT_DIR}/${model_name}_similarity_${TIMESTAMP}.txt"

    echo "============================================================"
    echo "Analyzing: ${model_name}"
    echo "Model:     ${model_path}"
    echo "Output:    ${output_file}"
    echo "============================================================"
    echo ""

    uv run python test_logo_detection.py \
        "${TEST_ARGS[@]}" \
        -e "${model_path}" \
        2>&1 | tee "${output_file}"

    echo ""
    echo "Results saved to: ${output_file}"
    echo ""
}

# Run analysis based on model selection
if [[ "${MODEL}" == "baseline" ]] || [[ "${MODEL}" == "both" ]]; then
    run_analysis "baseline" "${BASELINE_MODEL}"
fi

if [[ "${MODEL}" == "finetuned" ]] || [[ "${MODEL}" == "both" ]]; then
    if [ ! -d "${FINETUNED_MODEL}" ]; then
        echo "Warning: Fine-tuned model not found at ${FINETUNED_MODEL}"
        echo "Skipping fine-tuned model analysis."
    else
        run_analysis "finetuned" "${FINETUNED_MODEL}"
    fi
fi

echo "============================================================"
echo "Analysis complete!"
echo "Results saved to: ${OUTPUT_DIR}/"
echo "============================================================"