Add embedding model selection and comparison test scripts

- Update DetectLogosDETR to support both CLIP and DINOv2 models - Rename clip_model parameter to embedding_model - Add model type detection for different embedding extraction - DINOv2 uses CLS token, CLIP uses get_image_features() - Add -e/--embedding-model argument to test_logo_detection.py - Include model name in file output header - Add run_threshold_tests.sh for testing various threshold/margin values - Add run_model_comparison.sh for comparing CLIP vs DINOv2 models
2026-01-02 12:05:27 -05:00
parent a3008ee57f
commit 94db5bd40b
4 changed files with 312 additions and 30 deletions
--- a/run_threshold_tests.sh
+++ b/run_threshold_tests.sh
@ -0,0 +1,141 @@
+#!/bin/bash
+#
+# Run logo detection tests with various threshold and margin settings.
+# Uses multi-ref (max) matching method for all tests.
+#
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_FILE="${SCRIPT_DIR}/threshold_test_results.txt"
+
+# Common parameters
+NUM_LOGOS=20
+REFS_PER_LOGO=10
+POSITIVE_SAMPLES=20
+NEGATIVE_SAMPLES=100
+MIN_MATCHING_REFS=3
+SEED=42
+
+# Clear output file and write header
+echo "Threshold Optimization Tests" > "$OUTPUT_FILE"
+echo "=============================" >> "$OUTPUT_FILE"
+echo "Date: $(date)" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+echo "Common Parameters:" >> "$OUTPUT_FILE"
+echo "  Matching method: multi-ref (max)" >> "$OUTPUT_FILE"
+echo "  Reference logos: $NUM_LOGOS" >> "$OUTPUT_FILE"
+echo "  Refs per logo: $REFS_PER_LOGO" >> "$OUTPUT_FILE"
+echo "  Positive samples: $POSITIVE_SAMPLES" >> "$OUTPUT_FILE"
+echo "  Negative samples: $NEGATIVE_SAMPLES" >> "$OUTPUT_FILE"
+echo "  Min matching refs: $MIN_MATCHING_REFS" >> "$OUTPUT_FILE"
+echo "  Seed: $SEED" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+echo "Running threshold optimization tests..."
+echo "  Matching method: multi-ref (max)"
+echo "  Reference logos: $NUM_LOGOS"
+echo "  Refs per logo: $REFS_PER_LOGO"
+echo "  Seed: $SEED"
+echo ""
+
+# Test 1: Default parameters (baseline)
+echo "=== Test 1: Default parameters (threshold=0.70, margin=0.05) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.70 \
+    --margin 0.05 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+
+# Test 2: Higher threshold
+echo "=== Test 2: Higher threshold (threshold=0.80, margin=0.05) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.80 \
+    --margin 0.05 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+
+# Test 3: Higher threshold + larger margin
+echo "=== Test 3: Higher threshold + larger margin (threshold=0.80, margin=0.10) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.80 \
+    --margin 0.10 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+
+# Test 4: Very high threshold
+echo "=== Test 4: Very high threshold (threshold=0.85, margin=0.10) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.85 \
+    --margin 0.10 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+
+# Test 5: Very high threshold + large margin
+echo "=== Test 5: Strict parameters (threshold=0.85, margin=0.15) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.85 \
+    --margin 0.15 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+
+# Test 6: Maximum strictness
+echo "=== Test 6: Maximum strictness (threshold=0.90, margin=0.15) ==="
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --threshold 0.90 \
+    --margin 0.15 \
+    --seed $SEED \
+    --output-file "$OUTPUT_FILE"
+
+echo ""
+echo "Results saved to: $OUTPUT_FILE"