Add hybrid text+CLIP matching and image preprocessing
Hybrid matching combines text recognition with CLIP similarity: - If reference logo has text and detection matches: lower CLIP threshold - If reference has text but detection doesn't match: higher threshold - If reference has no text: standard threshold Image preprocessing adds letterbox/stretch modes for CLIP input to preserve aspect ratio instead of center cropping. New files: - run_hybrid_test.sh: Test hybrid matching configurations - run_preprocess_test.sh: Compare preprocessing modes Changes to logo_detection_detr.py: - Add preprocess_mode parameter (default/letterbox/stretch) - Add set_text_detector() for hybrid matching - Add extract_text() using EasyOCR - Add compute_text_similarity() with fuzzy matching - Add find_best_match_hybrid() with tiered thresholds Changes to test_logo_detection.py: - Add --matching-method hybrid option - Add --preprocess-mode option - Add hybrid threshold arguments
This commit is contained in:
168
run_hybrid_test.sh
Executable file
168
run_hybrid_test.sh
Executable file
@ -0,0 +1,168 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Test the hybrid text+CLIP matching approach for logo detection.
|
||||
#
|
||||
# This approach uses text recognition to improve logo matching:
|
||||
# - If reference logo has text and detection matches it: use lower CLIP threshold
|
||||
# - If reference logo has text but detection doesn't match: use higher CLIP threshold
|
||||
# - If reference logo has no text: use standard CLIP threshold
|
||||
#
|
||||
# Usage:
|
||||
# ./run_hybrid_test.sh
|
||||
#
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
OUTPUT_FILE="${SCRIPT_DIR}/test_results/hybrid_matching_results.txt"
|
||||
|
||||
# Model - baseline CLIP
|
||||
MODEL="openai/clip-vit-large-patch14"
|
||||
|
||||
# Fixed parameters
|
||||
NUM_LOGOS=20
|
||||
REFS_PER_LOGO=10
|
||||
POSITIVE_SAMPLES=20
|
||||
NEGATIVE_SAMPLES=100
|
||||
SEED=42
|
||||
|
||||
# Create output directory if needed
|
||||
mkdir -p "${SCRIPT_DIR}/test_results"
|
||||
|
||||
# Clear output file and write header
|
||||
cat > "$OUTPUT_FILE" << EOF
|
||||
Hybrid Text+CLIP Matching Test Results
|
||||
======================================
|
||||
Date: $(date)
|
||||
|
||||
Model: ${MODEL}
|
||||
|
||||
Fixed Parameters:
|
||||
Number of logo brands: ${NUM_LOGOS}
|
||||
Refs per logo: ${REFS_PER_LOGO}
|
||||
Positive samples/logo: ${POSITIVE_SAMPLES}
|
||||
Negative samples/logo: ${NEGATIVE_SAMPLES}
|
||||
Seed: ${SEED}
|
||||
|
||||
EOF
|
||||
|
||||
echo "Hybrid Text+CLIP Matching Test"
|
||||
echo "==============================="
|
||||
echo "Model: ${MODEL}"
|
||||
echo ""
|
||||
|
||||
# Test 1: Compare hybrid vs multi-ref baseline
|
||||
echo "=== Test 1: Multi-ref baseline (for comparison) ==="
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "=== BASELINE: Multi-ref (max) at threshold 0.70 ===" >> "$OUTPUT_FILE"
|
||||
|
||||
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method multi-ref \
|
||||
--min-matching-refs 1 \
|
||||
--use-max-similarity \
|
||||
--threshold 0.70 \
|
||||
--margin 0.05 \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--output-file "$OUTPUT_FILE" \
|
||||
--no-cache
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 2: Hybrid with default thresholds
|
||||
echo "=== Test 2: Hybrid with default thresholds ==="
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "=== HYBRID: default thresholds (0.70/0.60/0.80) ===" >> "$OUTPUT_FILE"
|
||||
|
||||
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method hybrid \
|
||||
--threshold 0.70 \
|
||||
--hybrid-text-threshold 0.60 \
|
||||
--hybrid-no-text-threshold 0.80 \
|
||||
--text-similarity-threshold 0.5 \
|
||||
--margin 0.05 \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--output-file "$OUTPUT_FILE" \
|
||||
--no-cache
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 3: Hybrid with more aggressive text bonus
|
||||
echo "=== Test 3: Hybrid with lower text-match threshold ==="
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "=== HYBRID: aggressive text bonus (0.70/0.55/0.80) ===" >> "$OUTPUT_FILE"
|
||||
|
||||
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method hybrid \
|
||||
--threshold 0.70 \
|
||||
--hybrid-text-threshold 0.55 \
|
||||
--hybrid-no-text-threshold 0.80 \
|
||||
--text-similarity-threshold 0.5 \
|
||||
--margin 0.05 \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--output-file "$OUTPUT_FILE" \
|
||||
--no-cache
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 4: Hybrid with stricter text mismatch penalty
|
||||
echo "=== Test 4: Hybrid with stricter text mismatch penalty ==="
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "=== HYBRID: strict mismatch (0.70/0.60/0.85) ===" >> "$OUTPUT_FILE"
|
||||
|
||||
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method hybrid \
|
||||
--threshold 0.70 \
|
||||
--hybrid-text-threshold 0.60 \
|
||||
--hybrid-no-text-threshold 0.85 \
|
||||
--text-similarity-threshold 0.5 \
|
||||
--margin 0.05 \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--output-file "$OUTPUT_FILE" \
|
||||
--no-cache
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 5: Hybrid with lower text similarity threshold (more lenient OCR matching)
|
||||
echo "=== Test 5: Hybrid with lenient text matching ==="
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "=== HYBRID: lenient text matching (text_sim=0.4) ===" >> "$OUTPUT_FILE"
|
||||
|
||||
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method hybrid \
|
||||
--threshold 0.70 \
|
||||
--hybrid-text-threshold 0.60 \
|
||||
--hybrid-no-text-threshold 0.80 \
|
||||
--text-similarity-threshold 0.4 \
|
||||
--margin 0.05 \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--output-file "$OUTPUT_FILE" \
|
||||
--no-cache
|
||||
|
||||
echo ""
|
||||
echo "======================================="
|
||||
echo "Tests complete!"
|
||||
echo "Results saved to: $OUTPUT_FILE"
|
||||
echo "======================================="
|
||||
Reference in New Issue
Block a user