Add hybrid text+CLIP matching and image preprocessing
Hybrid matching combines text recognition with CLIP similarity: - If reference logo has text and detection matches: lower CLIP threshold - If reference has text but detection doesn't match: higher threshold - If reference has no text: standard threshold Image preprocessing adds letterbox/stretch modes for CLIP input to preserve aspect ratio instead of center cropping. New files: - run_hybrid_test.sh: Test hybrid matching configurations - run_preprocess_test.sh: Compare preprocessing modes Changes to logo_detection_detr.py: - Add preprocess_mode parameter (default/letterbox/stretch) - Add set_text_detector() for hybrid matching - Add extract_text() using EasyOCR - Add compute_text_similarity() with fuzzy matching - Add find_best_match_hybrid() with tiered thresholds Changes to test_logo_detection.py: - Add --matching-method hybrid option - Add --preprocess-mode option - Add hybrid threshold arguments
This commit is contained in:
149
run_preprocess_test.sh
Executable file
149
run_preprocess_test.sh
Executable file
@ -0,0 +1,149 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# Test different image preprocessing modes to determine if they improve
|
||||
# CLIP embedding accuracy for logo matching.
|
||||
#
|
||||
# Preprocessing modes tested:
|
||||
# - default: CLIP's default (resize shortest edge + center crop)
|
||||
# - letterbox: Pad to square with black bars, preserving aspect ratio
|
||||
# - stretch: Stretch to square (distorts aspect ratio)
|
||||
#
|
||||
# Usage:
|
||||
# ./run_preprocess_test.sh
|
||||
#
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
OUTPUT_FILE="${SCRIPT_DIR}/test_results/preprocessing_comparison.txt"
|
||||
|
||||
# Model - baseline CLIP (testing preprocessing effect on standard model)
|
||||
MODEL="openai/clip-vit-large-patch14"
|
||||
|
||||
# Fixed parameters (same as refs_per_logo test for comparability)
|
||||
NUM_LOGOS=20
|
||||
REFS_PER_LOGO=10
|
||||
POSITIVE_SAMPLES=20
|
||||
NEGATIVE_SAMPLES=100
|
||||
MIN_MATCHING_REFS=1
|
||||
THRESHOLD=0.70
|
||||
MARGIN=0.05
|
||||
SEED=42
|
||||
|
||||
# Preprocessing modes to test
|
||||
MODES="default letterbox stretch"
|
||||
|
||||
# Create output directory if needed
|
||||
mkdir -p "${SCRIPT_DIR}/test_results"
|
||||
|
||||
# Clear output file and write header
|
||||
cat > "$OUTPUT_FILE" << EOF
|
||||
Image Preprocessing Comparison Test
|
||||
====================================
|
||||
Date: $(date)
|
||||
|
||||
Model: ${MODEL}
|
||||
Method: multi-ref (max)
|
||||
|
||||
Fixed Parameters:
|
||||
Number of logo brands: ${NUM_LOGOS}
|
||||
Refs per logo: ${REFS_PER_LOGO}
|
||||
Similarity threshold: ${THRESHOLD}
|
||||
Margin: ${MARGIN}
|
||||
Min matching refs: ${MIN_MATCHING_REFS}
|
||||
Positive samples/logo: ${POSITIVE_SAMPLES}
|
||||
Negative samples/logo: ${NEGATIVE_SAMPLES}
|
||||
Seed: ${SEED}
|
||||
|
||||
Testing preprocessing modes: ${MODES}
|
||||
|
||||
EOF
|
||||
|
||||
echo "Image Preprocessing Comparison Test"
|
||||
echo "===================================="
|
||||
echo "Model: ${MODEL}"
|
||||
echo "Testing preprocessing modes: ${MODES}"
|
||||
echo ""
|
||||
|
||||
# Results table header
|
||||
echo "Results Summary:" >> "$OUTPUT_FILE"
|
||||
echo "----------------" >> "$OUTPUT_FILE"
|
||||
printf "%-12s %8s %8s %8s %8s %8s %8s\n" "Mode" "TP" "FP" "FN" "Prec" "Recall" "F1" >> "$OUTPUT_FILE"
|
||||
echo "------------------------------------------------------------------------" >> "$OUTPUT_FILE"
|
||||
|
||||
# Track best result
|
||||
BEST_F1=0
|
||||
BEST_MODE="default"
|
||||
|
||||
for MODE in ${MODES}; do
|
||||
echo "=== Testing preprocess_mode=${MODE} ==="
|
||||
|
||||
# Clear cache to ensure fresh embeddings with new preprocessing
|
||||
rm -f "${SCRIPT_DIR}/.embedding_cache.pkl"
|
||||
|
||||
# Run test and capture output
|
||||
OUTPUT=$(uv run python "$SCRIPT_DIR/test_logo_detection.py" \
|
||||
--num-logos $NUM_LOGOS \
|
||||
--refs-per-logo $REFS_PER_LOGO \
|
||||
--positive-samples $POSITIVE_SAMPLES \
|
||||
--negative-samples $NEGATIVE_SAMPLES \
|
||||
--matching-method multi-ref \
|
||||
--min-matching-refs $MIN_MATCHING_REFS \
|
||||
--use-max-similarity \
|
||||
--threshold $THRESHOLD \
|
||||
--margin $MARGIN \
|
||||
--seed $SEED \
|
||||
--embedding-model "$MODEL" \
|
||||
--preprocess-mode "$MODE" \
|
||||
--no-cache \
|
||||
2>&1)
|
||||
|
||||
# Extract metrics
|
||||
TP=$(echo "${OUTPUT}" | grep "True Positives" | grep -oE "[0-9]+" | head -1)
|
||||
FP=$(echo "${OUTPUT}" | grep "False Positives" | grep -oE "[0-9]+" | head -1)
|
||||
FN=$(echo "${OUTPUT}" | grep "False Negatives" | grep -oE "[0-9]+" | head -1)
|
||||
PREC=$(echo "${OUTPUT}" | grep "Precision:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||
RECALL=$(echo "${OUTPUT}" | grep "Recall:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||
F1=$(echo "${OUTPUT}" | grep "F1 Score:" | grep -oE "[0-9]+\.[0-9]+%" | head -1)
|
||||
|
||||
# Print to console
|
||||
echo " TP: ${TP}, FP: ${FP}, FN: ${FN}"
|
||||
echo " Precision: ${PREC}, Recall: ${RECALL}, F1: ${F1}"
|
||||
echo ""
|
||||
|
||||
# Add to results table
|
||||
printf "%-12s %8s %8s %8s %8s %8s %8s\n" "${MODE}" "${TP}" "${FP}" "${FN}" "${PREC}" "${RECALL}" "${F1}" >> "$OUTPUT_FILE"
|
||||
|
||||
# Track best F1
|
||||
F1_NUM=$(echo "${F1}" | tr -d '%')
|
||||
if [ -n "$F1_NUM" ]; then
|
||||
BETTER=$(echo "${F1_NUM} > ${BEST_F1}" | bc -l 2>/dev/null || echo "0")
|
||||
if [ "$BETTER" = "1" ]; then
|
||||
BEST_F1="${F1_NUM}"
|
||||
BEST_MODE="${MODE}"
|
||||
fi
|
||||
fi
|
||||
|
||||
# Also append full output for this test
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "======================================================================" >> "$OUTPUT_FILE"
|
||||
echo "DETAILED RESULTS: preprocess_mode=${MODE}" >> "$OUTPUT_FILE"
|
||||
echo "======================================================================" >> "$OUTPUT_FILE"
|
||||
echo "${OUTPUT}" | grep -A 50 "Configuration:" | head -30 >> "$OUTPUT_FILE"
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
done
|
||||
|
||||
# Summary
|
||||
echo "------------------------------------------------------------------------" >> "$OUTPUT_FILE"
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "BEST PREPROCESSING MODE: ${BEST_MODE} (F1 = ${BEST_F1}%)" >> "$OUTPUT_FILE"
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
echo "Notes:" >> "$OUTPUT_FILE"
|
||||
echo " - default: CLIP's standard preprocessing (resize shortest edge + center crop)" >> "$OUTPUT_FILE"
|
||||
echo " - letterbox: Pads image to square with black bars, preserving aspect ratio" >> "$OUTPUT_FILE"
|
||||
echo " - stretch: Resizes image to square, distorting aspect ratio" >> "$OUTPUT_FILE"
|
||||
echo "" >> "$OUTPUT_FILE"
|
||||
|
||||
echo "======================================="
|
||||
echo "BEST: preprocess_mode=${BEST_MODE} (F1 = ${BEST_F1}%)"
|
||||
echo "======================================="
|
||||
echo ""
|
||||
echo "Results saved to: $OUTPUT_FILE"
|
||||
Reference in New Issue
Block a user