Add hybrid text+CLIP matching and image preprocessing

Hybrid matching combines text recognition with CLIP similarity:
- If reference logo has text and detection matches: lower CLIP threshold
- If reference has text but detection doesn't match: higher threshold
- If reference has no text: standard threshold

Image preprocessing adds letterbox/stretch modes for CLIP input to
preserve aspect ratio instead of center cropping.

New files:
- run_hybrid_test.sh: Test hybrid matching configurations
- run_preprocess_test.sh: Compare preprocessing modes

Changes to logo_detection_detr.py:
- Add preprocess_mode parameter (default/letterbox/stretch)
- Add set_text_detector() for hybrid matching
- Add extract_text() using EasyOCR
- Add compute_text_similarity() with fuzzy matching
- Add find_best_match_hybrid() with tiered thresholds

Changes to test_logo_detection.py:
- Add --matching-method hybrid option
- Add --preprocess-mode option
- Add hybrid threshold arguments
This commit is contained in:
Rick McEwen
2026-01-07 15:09:09 -05:00
parent 78f46f04bf
commit 49f982611a
4 changed files with 817 additions and 13 deletions

168
run_hybrid_test.sh Executable file
View File

@ -0,0 +1,168 @@
#!/bin/bash
#
# Test the hybrid text+CLIP matching approach for logo detection.
#
# This approach uses text recognition to improve logo matching:
# - If reference logo has text and detection matches it: use lower CLIP threshold
# - If reference logo has text but detection doesn't match: use higher CLIP threshold
# - If reference logo has no text: use standard CLIP threshold
#
# Usage:
# ./run_hybrid_test.sh
#
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT_FILE="${SCRIPT_DIR}/test_results/hybrid_matching_results.txt"
# Model - baseline CLIP
MODEL="openai/clip-vit-large-patch14"
# Fixed parameters
NUM_LOGOS=20
REFS_PER_LOGO=10
POSITIVE_SAMPLES=20
NEGATIVE_SAMPLES=100
SEED=42
# Create output directory if needed
mkdir -p "${SCRIPT_DIR}/test_results"
# Clear output file and write header
cat > "$OUTPUT_FILE" << EOF
Hybrid Text+CLIP Matching Test Results
======================================
Date: $(date)
Model: ${MODEL}
Fixed Parameters:
Number of logo brands: ${NUM_LOGOS}
Refs per logo: ${REFS_PER_LOGO}
Positive samples/logo: ${POSITIVE_SAMPLES}
Negative samples/logo: ${NEGATIVE_SAMPLES}
Seed: ${SEED}
EOF
echo "Hybrid Text+CLIP Matching Test"
echo "==============================="
echo "Model: ${MODEL}"
echo ""
# Test 1: Compare hybrid vs multi-ref baseline
echo "=== Test 1: Multi-ref baseline (for comparison) ==="
echo "" >> "$OUTPUT_FILE"
echo "=== BASELINE: Multi-ref (max) at threshold 0.70 ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method multi-ref \
--min-matching-refs 1 \
--use-max-similarity \
--threshold 0.70 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 2: Hybrid with default thresholds
echo "=== Test 2: Hybrid with default thresholds ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: default thresholds (0.70/0.60/0.80) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 3: Hybrid with more aggressive text bonus
echo "=== Test 3: Hybrid with lower text-match threshold ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: aggressive text bonus (0.70/0.55/0.80) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.55 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 4: Hybrid with stricter text mismatch penalty
echo "=== Test 4: Hybrid with stricter text mismatch penalty ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: strict mismatch (0.70/0.60/0.85) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.85 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 5: Hybrid with lower text similarity threshold (more lenient OCR matching)
echo "=== Test 5: Hybrid with lenient text matching ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: lenient text matching (text_sim=0.4) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.4 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
echo "======================================="
echo "Tests complete!"
echo "Results saved to: $OUTPUT_FILE"
echo "======================================="