Add hybrid text+CLIP matching and image preprocessing

Hybrid matching combines text recognition with CLIP similarity: - If reference logo has text and detection matches: lower CLIP threshold - If reference has text but detection doesn't match: higher threshold - If reference has no text: standard threshold Image preprocessing adds letterbox/stretch modes for CLIP input to preserve aspect ratio instead of center cropping. New files: - run_hybrid_test.sh: Test hybrid matching configurations - run_preprocess_test.sh: Compare preprocessing modes Changes to logo_detection_detr.py: - Add preprocess_mode parameter (default/letterbox/stretch) - Add set_text_detector() for hybrid matching - Add extract_text() using EasyOCR - Add compute_text_similarity() with fuzzy matching - Add find_best_match_hybrid() with tiered thresholds Changes to test_logo_detection.py: - Add --matching-method hybrid option - Add --preprocess-mode option - Add hybrid threshold arguments
2026-01-07 15:09:09 -05:00
parent 78f46f04bf
commit 49f982611a
4 changed files with 817 additions and 13 deletions
--- a/run_hybrid_test.sh
+++ b/run_hybrid_test.sh
@ -0,0 +1,168 @@
+#!/bin/bash
+#
+# Test the hybrid text+CLIP matching approach for logo detection.
+#
+# This approach uses text recognition to improve logo matching:
+# - If reference logo has text and detection matches it: use lower CLIP threshold
+# - If reference logo has text but detection doesn't match: use higher CLIP threshold
+# - If reference logo has no text: use standard CLIP threshold
+#
+# Usage:
+#   ./run_hybrid_test.sh
+#
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_FILE="${SCRIPT_DIR}/test_results/hybrid_matching_results.txt"
+
+# Model - baseline CLIP
+MODEL="openai/clip-vit-large-patch14"
+
+# Fixed parameters
+NUM_LOGOS=20
+REFS_PER_LOGO=10
+POSITIVE_SAMPLES=20
+NEGATIVE_SAMPLES=100
+SEED=42
+
+# Create output directory if needed
+mkdir -p "${SCRIPT_DIR}/test_results"
+
+# Clear output file and write header
+cat > "$OUTPUT_FILE" << EOF
+Hybrid Text+CLIP Matching Test Results
+======================================
+Date: $(date)
+
+Model: ${MODEL}
+
+Fixed Parameters:
+  Number of logo brands: ${NUM_LOGOS}
+  Refs per logo: ${REFS_PER_LOGO}
+  Positive samples/logo: ${POSITIVE_SAMPLES}
+  Negative samples/logo: ${NEGATIVE_SAMPLES}
+  Seed: ${SEED}
+
+EOF
+
+echo "Hybrid Text+CLIP Matching Test"
+echo "==============================="
+echo "Model: ${MODEL}"
+echo ""
+
+# Test 1: Compare hybrid vs multi-ref baseline
+echo "=== Test 1: Multi-ref baseline (for comparison) ==="
+echo "" >> "$OUTPUT_FILE"
+echo "=== BASELINE: Multi-ref (max) at threshold 0.70 ===" >> "$OUTPUT_FILE"
+
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs 1 \
+    --use-max-similarity \
+    --threshold 0.70 \
+    --margin 0.05 \
+    --seed $SEED \
+    --embedding-model "$MODEL" \
+    --output-file "$OUTPUT_FILE" \
+    --no-cache
+
+echo ""
+
+# Test 2: Hybrid with default thresholds
+echo "=== Test 2: Hybrid with default thresholds ==="
+echo "" >> "$OUTPUT_FILE"
+echo "=== HYBRID: default thresholds (0.70/0.60/0.80) ===" >> "$OUTPUT_FILE"
+
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method hybrid \
+    --threshold 0.70 \
+    --hybrid-text-threshold 0.60 \
+    --hybrid-no-text-threshold 0.80 \
+    --text-similarity-threshold 0.5 \
+    --margin 0.05 \
+    --seed $SEED \
+    --embedding-model "$MODEL" \
+    --output-file "$OUTPUT_FILE" \
+    --no-cache
+
+echo ""
+
+# Test 3: Hybrid with more aggressive text bonus
+echo "=== Test 3: Hybrid with lower text-match threshold ==="
+echo "" >> "$OUTPUT_FILE"
+echo "=== HYBRID: aggressive text bonus (0.70/0.55/0.80) ===" >> "$OUTPUT_FILE"
+
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method hybrid \
+    --threshold 0.70 \
+    --hybrid-text-threshold 0.55 \
+    --hybrid-no-text-threshold 0.80 \
+    --text-similarity-threshold 0.5 \
+    --margin 0.05 \
+    --seed $SEED \
+    --embedding-model "$MODEL" \
+    --output-file "$OUTPUT_FILE" \
+    --no-cache
+
+echo ""
+
+# Test 4: Hybrid with stricter text mismatch penalty
+echo "=== Test 4: Hybrid with stricter text mismatch penalty ==="
+echo "" >> "$OUTPUT_FILE"
+echo "=== HYBRID: strict mismatch (0.70/0.60/0.85) ===" >> "$OUTPUT_FILE"
+
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method hybrid \
+    --threshold 0.70 \
+    --hybrid-text-threshold 0.60 \
+    --hybrid-no-text-threshold 0.85 \
+    --text-similarity-threshold 0.5 \
+    --margin 0.05 \
+    --seed $SEED \
+    --embedding-model "$MODEL" \
+    --output-file "$OUTPUT_FILE" \
+    --no-cache
+
+echo ""
+
+# Test 5: Hybrid with lower text similarity threshold (more lenient OCR matching)
+echo "=== Test 5: Hybrid with lenient text matching ==="
+echo "" >> "$OUTPUT_FILE"
+echo "=== HYBRID: lenient text matching (text_sim=0.4) ===" >> "$OUTPUT_FILE"
+
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method hybrid \
+    --threshold 0.70 \
+    --hybrid-text-threshold 0.60 \
+    --hybrid-no-text-threshold 0.80 \
+    --text-similarity-threshold 0.4 \
+    --margin 0.05 \
+    --seed $SEED \
+    --embedding-model "$MODEL" \
+    --output-file "$OUTPUT_FILE" \
+    --no-cache
+
+echo ""
+echo "======================================="
+echo "Tests complete!"
+echo "Results saved to: $OUTPUT_FILE"
+echo "======================================="