Add margin check to multi-ref matching to reduce false positives

The multi-ref matching method was missing a margin check against other logos, causing excessive false positives. This fix adds: - margin parameter to find_best_match_multi_ref() that requires the best logo's score to exceed the second-best by a minimum margin - Test script now passes --margin to both matching methods - Updated documentation to reflect margin applies to both methods Also adds run_comparison_tests.sh to run all three matching methods and compare results.
2025-12-31 11:23:47 -05:00
parent ddccf653d2
commit 197e007591
5 changed files with 120 additions and 29 deletions
--- a/logo_detection_detr.py
+++ b/logo_detection_detr.py
@ -401,6 +401,7 @@ class DetectLogosDETR:
        similarity_threshold: float = 0.85,
        min_matching_refs: int = 1,
        use_mean_similarity: bool = True,
+        margin: float = 0.0,
    ) -> Optional[Tuple[str, float, int]]:
        """
        Find the best matching reference logo using multiple reference embeddings per logo.
@ -414,6 +415,7 @@ class DetectLogosDETR:
            similarity_threshold: Minimum similarity to consider a match (0-1)
            min_matching_refs: Minimum number of references that must match above threshold
            use_mean_similarity: If True, use mean similarity across all refs; if False, use max
+            margin: Required margin between best and second-best logo scores (0-1)

        Returns:
            Tuple of (label, similarity, num_matching_refs) for best match,
@ -422,9 +424,8 @@ class DetectLogosDETR:
        if not reference_embeddings:
            return None

-        best_score = -1.0
-        best_label = None
-        best_num_matches = 0
+        # Calculate scores for all logos that meet the min_matching_refs requirement
+        logo_scores = []

        for label, ref_embedding_list in reference_embeddings.items():
            if not ref_embedding_list:
@ -445,17 +446,30 @@ class DetectLogosDETR:
            else:
                score = max(similarities)

-            # Check if this logo meets the minimum matching refs requirement
-            if num_matches >= min_matching_refs and score > best_score:
-                best_score = score
-                best_label = label
-                best_num_matches = num_matches
+            # Only consider logos that meet the minimum matching refs requirement
+            if num_matches >= min_matching_refs:
+                logo_scores.append((label, score, num_matches))

-        if best_label is not None and best_score >= similarity_threshold:
-            return (best_label, best_score, best_num_matches)
-        else:
+        if not logo_scores:
            return None

+        # Sort by score descending
+        logo_scores.sort(key=lambda x: x[1], reverse=True)
+
+        best_label, best_score, best_num_matches = logo_scores[0]
+
+        # Check if best score meets threshold
+        if best_score < similarity_threshold:
+            return None
+
+        # Check margin against second-best logo (if exists)
+        if margin > 0 and len(logo_scores) > 1:
+            second_best_score = logo_scores[1][1]
+            if best_score - second_best_score < margin:
+                return None  # Not confident enough
+
+        return (best_label, best_score, best_num_matches)
+
    def find_best_match_with_margin(
        self,
        detected_embedding: torch.Tensor,
--- a/logo_detection_detr_usage.md
+++ b/logo_detection_detr_usage.md
@ -143,7 +143,8 @@ match = detector.find_best_match_multi_ref(
    reference_embeddings,       # Dict: logo_name -> list of embeddings
    similarity_threshold=0.85,
    min_matching_refs=1,
-    use_mean_similarity=True
+    use_mean_similarity=True,
+    margin=0.05
 )
 # Returns: (label, similarity, num_matching_refs) or None
 ```
@ -154,9 +155,13 @@ match = detector.find_best_match_multi_ref(
 - `similarity_threshold`: Minimum similarity to consider a match (0-1, default: 0.85)
 - `min_matching_refs`: Minimum number of references that must match above threshold (default: 1)
 - `use_mean_similarity`: If True, use mean similarity; if False, use max (default: True)
+- `margin`: Required difference between best and second-best logo scores (default: 0.0)

 **Returns:**
- Tuple of (label, similarity, num_matching_refs) for best match, or None if no match meets criteria
+- Tuple of (label, similarity, num_matching_refs) for best match, or None if:
+  - No logo meets the min_matching_refs requirement, OR
+  - Best score is below threshold, OR
+  - Best score doesn't exceed second-best by the required margin

 **Example:**
 ```python
@ -171,7 +176,8 @@ match = detector.find_best_match_multi_ref(
    multi_ref_embeddings,
    similarity_threshold=0.80,
    min_matching_refs=2,        # At least 2 refs must match
-    use_mean_similarity=True    # Average across all refs
+    use_mean_similarity=True,   # Average across all refs
+    margin=0.05                 # Require 0.05 margin over second-best logo
 )

 if match:
--- a/logo_detection_test_methodology.md
+++ b/logo_detection_test_methodology.md
@ -40,12 +40,7 @@ The system uses a two-stage pipeline:
 | Parameter | Default | Description |
 |-----------|---------|-------------|
 | `--matching-method` | margin | Matching method: `margin` or `multi-ref` |
-
-#### Margin Method Parameters (when `--matching-method margin`)
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `--margin` | 0.05 | Required margin between best and second-best match |
+| `--margin` | 0.05 | Required margin between best and second-best match (applies to both methods) |

 #### Multi-Ref Method Parameters (when `--matching-method multi-ref`)

@ -275,8 +270,8 @@ Matched Logo Labels

 | Goal | Adjustments |
 |------|-------------|
-| **Reduce false positives** | Increase `--threshold`, increase `--min-matching-refs`, use mean similarity |
-| **Reduce false negatives** | Decrease `--threshold`, decrease `--min-matching-refs`, use `--use-max-similarity` |
+| **Reduce false positives** | Increase `--threshold`, increase `--margin`, increase `--min-matching-refs`, use mean similarity |
+| **Reduce false negatives** | Decrease `--threshold`, decrease `--margin`, decrease `--min-matching-refs`, use `--use-max-similarity` |

 ### General Tuning

@ -295,13 +290,13 @@ Matched Logo Labels
 # Default margin-based matching
 python test_logo_detection.py -n 20 --threshold 0.75 --margin 0.05

-# Multi-ref matching with mean similarity
+# Multi-ref matching with margin (recommended for reducing false positives)
 python test_logo_detection.py -n 20 --matching-method multi-ref \
-    --refs-per-logo 5 --min-matching-refs 2 --threshold 0.70
+    --refs-per-logo 5 --min-matching-refs 2 --threshold 0.70 --margin 0.05

 # Multi-ref matching with max similarity (more lenient)
 python test_logo_detection.py -n 20 --matching-method multi-ref \
-    --refs-per-logo 5 --min-matching-refs 1 --use-max-similarity
+    --refs-per-logo 5 --min-matching-refs 1 --use-max-similarity --margin 0.03

 # Reproducible test with seed
 python test_logo_detection.py -n 50 --seed 42 --clear-cache
--- a/run_comparison_tests.sh
+++ b/run_comparison_tests.sh
@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Run logo detection tests with all three matching methods and save results.
+#
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_FILE="${SCRIPT_DIR}/comparison_results.txt"
+
+# Common parameters
+NUM_LOGOS=20
+REFS_PER_LOGO=10
+POSITIVE_SAMPLES=20
+NEGATIVE_SAMPLES=100
+MIN_MATCHING_REFS=3
+
+# Use a fixed seed for reproducibility across methods
+SEED=42
+
+echo "Logo Detection Comparison Tests" > "$OUTPUT_FILE"
+echo "================================" >> "$OUTPUT_FILE"
+echo "Date: $(date)" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+echo "Running tests with:"
+echo "  Reference logos: $NUM_LOGOS"
+echo "  Refs per logo: $REFS_PER_LOGO"
+echo "  Positive samples: $POSITIVE_SAMPLES"
+echo "  Negative samples: $NEGATIVE_SAMPLES"
+echo "  Min matching refs: $MIN_MATCHING_REFS"
+echo "  Seed: $SEED"
+echo ""
+
+# Test 1: Margin-based matching
+echo "=== Test 1: Margin-based matching ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method margin \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo "" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+# Test 2: Multi-ref with mean similarity
+echo "=== Test 2: Multi-ref matching (mean similarity) ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo "" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+# Test 3: Multi-ref with max similarity
+echo "=== Test 3: Multi-ref matching (max similarity) ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo ""
+echo "Results saved to: $OUTPUT_FILE"
--- a/test_logo_detection.py
+++ b/test_logo_detection.py
@ -231,7 +231,7 @@ def main():
        "--margin",
        type=float,
        default=0.05,
-        help="Required margin between best and second-best match for 'margin' method (default: 0.05)",
+        help="Required margin between best and second-best match (applies to both methods) (default: 0.05)",
    )
    parser.add_argument(
        "--matching-method",
@ -453,6 +453,7 @@ def main():
                    similarity_threshold=args.threshold,
                    min_matching_refs=args.min_matching_refs,
                    use_mean_similarity=not args.use_max_similarity,
+                    margin=args.margin,
                )
                if match_result:
                    label, similarity, num_matching = match_result
@ -511,9 +512,8 @@ def main():
    print(f"  CLIP similarity threshold: {args.threshold}")
    print(f"  DETR confidence threshold: {args.detr_threshold}")
    print(f"  Matching method:           {args.matching_method}")
-    if args.matching_method == "margin":
-        print(f"  Matching margin:           {args.margin}")
-    else:  # multi-ref
+    print(f"  Matching margin:           {args.margin}")
+    if args.matching_method == "multi-ref":
        print(f"  Min matching refs:         {args.min_matching_refs}")
        print(f"  Similarity aggregation:    {'max' if args.use_max_similarity else 'mean'}")
    if args.seed is not None: