Add hybrid text+CLIP matching and image preprocessing

Hybrid matching combines text recognition with CLIP similarity: - If reference logo has text and detection matches: lower CLIP threshold - If reference has text but detection doesn't match: higher threshold - If reference has no text: standard threshold Image preprocessing adds letterbox/stretch modes for CLIP input to preserve aspect ratio instead of center cropping. New files: - run_hybrid_test.sh: Test hybrid matching configurations - run_preprocess_test.sh: Compare preprocessing modes Changes to logo_detection_detr.py: - Add preprocess_mode parameter (default/letterbox/stretch) - Add set_text_detector() for hybrid matching - Add extract_text() using EasyOCR - Add compute_text_similarity() with fuzzy matching - Add find_best_match_hybrid() with tiered thresholds Changes to test_logo_detection.py: - Add --matching-method hybrid option - Add --preprocess-mode option - Add hybrid threshold arguments
2026-01-07 15:09:09 -05:00
parent 78f46f04bf
commit 49f982611a
4 changed files with 817 additions and 13 deletions
--- a/run_preprocess_test.sh
+++ b/run_preprocess_test.sh
@ -0,0 +1,149 @@
+#!/bin/bash
+#
+# Test different image preprocessing modes to determine if they improve
+# CLIP embedding accuracy for logo matching.
+#
+# Preprocessing modes tested:
+#   - default:   CLIP's default (resize shortest edge + center crop)
+#   - letterbox: Pad to square with black bars, preserving aspect ratio
+#   - stretch:   Stretch to square (distorts aspect ratio)
+#
+# Usage:
+#   ./run_preprocess_test.sh
+#
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_FILE="${SCRIPT_DIR}/test_results/preprocessing_comparison.txt"
+
+# Model - baseline CLIP (testing preprocessing effect on standard model)
+MODEL="openai/clip-vit-large-patch14"
+
+# Fixed parameters (same as refs_per_logo test for comparability)
+NUM_LOGOS=20
+REFS_PER_LOGO=10
+POSITIVE_SAMPLES=20
+NEGATIVE_SAMPLES=100
+MIN_MATCHING_REFS=1
+THRESHOLD=0.70
+MARGIN=0.05
+SEED=42
+
+# Preprocessing modes to test
+MODES="default letterbox stretch"
+
+# Create output directory if needed
+mkdir -p "${SCRIPT_DIR}/test_results"
+
+# Clear output file and write header
+cat > "$OUTPUT_FILE" << EOF
+Image Preprocessing Comparison Test
+====================================
+Date: $(date)
+
+Model: ${MODEL}
+Method: multi-ref (max)
+
+Fixed Parameters:
+  Number of logo brands: ${NUM_LOGOS}
+  Refs per logo: ${REFS_PER_LOGO}
+  Similarity threshold: ${THRESHOLD}
+  Margin: ${MARGIN}
+  Min matching refs: ${MIN_MATCHING_REFS}
+  Positive samples/logo: ${POSITIVE_SAMPLES}
+  Negative samples/logo: ${NEGATIVE_SAMPLES}
+  Seed: ${SEED}
+
+Testing preprocessing modes: ${MODES}
+
+EOF
+
+echo "Image Preprocessing Comparison Test"
+echo "===================================="
+echo "Model: ${MODEL}"
+echo "Testing preprocessing modes: ${MODES}"
+echo ""
+
+# Results table header
+echo "Results Summary:" >> "$OUTPUT_FILE"
+echo "----------------" >> "$OUTPUT_FILE"
+printf "%-12s %8s %8s %8s %8s %8s %8s\n" "Mode" "TP" "FP" "FN" "Prec" "Recall" "F1" >> "$OUTPUT_FILE"
+echo "------------------------------------------------------------------------" >> "$OUTPUT_FILE"
+
+# Track best result
+BEST_F1=0
+BEST_MODE="default"
+
+for MODE in ${MODES}; do
+    echo "=== Testing preprocess_mode=${MODE} ==="
+
+    # Clear cache to ensure fresh embeddings with new preprocessing
+    rm -f "${SCRIPT_DIR}/.embedding_cache.pkl"
+
+    # Run test and capture output
+    OUTPUT=$(uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+        --num-logos $NUM_LOGOS \
+        --refs-per-logo $REFS_PER_LOGO \
+        --positive-samples $POSITIVE_SAMPLES \
+        --negative-samples $NEGATIVE_SAMPLES \
+        --matching-method multi-ref \
+        --min-matching-refs $MIN_MATCHING_REFS \
+        --use-max-similarity \
+        --threshold $THRESHOLD \
+        --margin $MARGIN \
+        --seed $SEED \
+        --embedding-model "$MODEL" \
+        --preprocess-mode "$MODE" \
+        --no-cache \
+        2>&1)
+
+    # Extract metrics
+    TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1)
+    FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1)
+    FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1)
+    PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
+    RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
+    F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
+
+    # Print to console
+    echo "  TP: ${TP}, FP: ${FP}, FN: ${FN}"
+    echo "  Precision: ${PREC}, Recall: ${RECALL}, F1: ${F1}"
+    echo ""
+
+    # Add to results table
+    printf "%-12s %8s %8s %8s %8s %8s %8s\n" "${MODE}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" >> "$OUTPUT_FILE"
+
+    # Track best F1
+    F1_NUM=$(echo "${F1}" | tr -d '%')
+    if [ -n "$F1_NUM" ]; then
+        BETTER=$(echo "${F1_NUM} > ${BEST_F1}" | bc -l 2>/dev/null || echo "0")
+        if [ "$BETTER" = "1" ]; then
+            BEST_F1="${F1_NUM}"
+            BEST_MODE="${MODE}"
+        fi
+    fi
+
+    # Also append full output for this test
+    echo "" >> "$OUTPUT_FILE"
+    echo "======================================================================" >> "$OUTPUT_FILE"
+    echo "DETAILED RESULTS: preprocess_mode=${MODE}" >> "$OUTPUT_FILE"
+    echo "======================================================================" >> "$OUTPUT_FILE"
+    echo "${OUTPUT}" | grep -A 50 "Configuration:" | head -30 >> "$OUTPUT_FILE"
+    echo "" >> "$OUTPUT_FILE"
+done
+
+# Summary
+echo "------------------------------------------------------------------------" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+echo "BEST PREPROCESSING MODE: ${BEST_MODE} (F1 = ${BEST_F1}%)" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+echo "Notes:" >> "$OUTPUT_FILE"
+echo "  - default: CLIP's standard preprocessing (resize shortest edge + center crop)" >> "$OUTPUT_FILE"
+echo "  - letterbox: Pads image to square with black bars, preserving aspect ratio" >> "$OUTPUT_FILE"
+echo "  - stretch: Resizes image to square, distorting aspect ratio" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+echo "======================================="
+echo "BEST: preprocess_mode=${BEST_MODE} (F1 = ${BEST_F1}%)"
+echo "======================================="
+echo ""
+echo "Results saved to: $OUTPUT_FILE"