diff --git a/logo_detection_detr.py b/logo_detection_detr.py
index af8a9ac..fe94a30 100644
--- a/logo_detection_detr.py
+++ b/logo_detection_detr.py
@@ -401,6 +401,7 @@ class DetectLogosDETR:
         similarity_threshold: float = 0.85,
         min_matching_refs: int = 1,
         use_mean_similarity: bool = True,
+        margin: float = 0.0,
     ) -> Optional[Tuple[str, float, int]]:
         """
         Find the best matching reference logo using multiple reference embeddings per logo.
@@ -414,6 +415,7 @@ class DetectLogosDETR:
             similarity_threshold: Minimum similarity to consider a match (0-1)
             min_matching_refs: Minimum number of references that must match above threshold
             use_mean_similarity: If True, use mean similarity across all refs; if False, use max
+            margin: Required margin between best and second-best logo scores (0-1)
 
         Returns:
             Tuple of (label, similarity, num_matching_refs) for best match,
@@ -422,9 +424,8 @@ class DetectLogosDETR:
         if not reference_embeddings:
             return None
 
-        best_score = -1.0
-        best_label = None
-        best_num_matches = 0
+        # Calculate scores for all logos that meet the min_matching_refs requirement
+        logo_scores = []
 
         for label, ref_embedding_list in reference_embeddings.items():
             if not ref_embedding_list:
@@ -445,17 +446,30 @@ class DetectLogosDETR:
             else:
                 score = max(similarities)
 
-            # Check if this logo meets the minimum matching refs requirement
-            if num_matches >= min_matching_refs and score > best_score:
-                best_score = score
-                best_label = label
-                best_num_matches = num_matches
+            # Only consider logos that meet the minimum matching refs requirement
+            if num_matches >= min_matching_refs:
+                logo_scores.append((label, score, num_matches))
 
-        if best_label is not None and best_score >= similarity_threshold:
-            return (best_label, best_score, best_num_matches)
-        else:
+        if not logo_scores:
             return None
 
+        # Sort by score descending
+        logo_scores.sort(key=lambda x: x[1], reverse=True)
+
+        best_label, best_score, best_num_matches = logo_scores[0]
+
+        # Check if best score meets threshold
+        if best_score < similarity_threshold:
+            return None
+
+        # Check margin against second-best logo (if exists)
+        if margin > 0 and len(logo_scores) > 1:
+            second_best_score = logo_scores[1][1]
+            if best_score - second_best_score < margin:
+                return None  # Not confident enough
+
+        return (best_label, best_score, best_num_matches)
+
     def find_best_match_with_margin(
         self,
         detected_embedding: torch.Tensor,
diff --git a/logo_detection_detr_usage.md b/logo_detection_detr_usage.md
index da99362..e17e475 100644
--- a/logo_detection_detr_usage.md
+++ b/logo_detection_detr_usage.md
@@ -143,7 +143,8 @@ match = detector.find_best_match_multi_ref(
     reference_embeddings,       # Dict: logo_name -> list of embeddings
     similarity_threshold=0.85,
     min_matching_refs=1,
-    use_mean_similarity=True
+    use_mean_similarity=True,
+    margin=0.05
 )
 # Returns: (label, similarity, num_matching_refs) or None
 ```
@@ -154,9 +155,13 @@ match = detector.find_best_match_multi_ref(
 - `similarity_threshold`: Minimum similarity to consider a match (0-1, default: 0.85)
 - `min_matching_refs`: Minimum number of references that must match above threshold (default: 1)
 - `use_mean_similarity`: If True, use mean similarity; if False, use max (default: True)
+- `margin`: Required difference between best and second-best logo scores (default: 0.0)
 
 **Returns:**
-- Tuple of (label, similarity, num_matching_refs) for best match, or None if no match meets criteria
+- Tuple of (label, similarity, num_matching_refs) for best match, or None if:
+  - No logo meets the min_matching_refs requirement, OR
+  - Best score is below threshold, OR
+  - Best score doesn't exceed second-best by the required margin
 
 **Example:**
 ```python
@@ -171,7 +176,8 @@ match = detector.find_best_match_multi_ref(
     multi_ref_embeddings,
     similarity_threshold=0.80,
     min_matching_refs=2,        # At least 2 refs must match
-    use_mean_similarity=True    # Average across all refs
+    use_mean_similarity=True,   # Average across all refs
+    margin=0.05                 # Require 0.05 margin over second-best logo
 )
 
 if match:
diff --git a/logo_detection_test_methodology.md b/logo_detection_test_methodology.md
index dff29a5..7621b6f 100644
--- a/logo_detection_test_methodology.md
+++ b/logo_detection_test_methodology.md
@@ -40,12 +40,7 @@ The system uses a two-stage pipeline:
 | Parameter | Default | Description |
 |-----------|---------|-------------|
 | `--matching-method` | margin | Matching method: `margin` or `multi-ref` |
-
-#### Margin Method Parameters (when `--matching-method margin`)
-
-| Parameter | Default | Description |
-|-----------|---------|-------------|
-| `--margin` | 0.05 | Required margin between best and second-best match |
+| `--margin` | 0.05 | Required margin between best and second-best match (applies to both methods) |
 
 #### Multi-Ref Method Parameters (when `--matching-method multi-ref`)
 
@@ -275,8 +270,8 @@ Matched Logo Labels
 
 | Goal | Adjustments |
 |------|-------------|
-| **Reduce false positives** | Increase `--threshold`, increase `--min-matching-refs`, use mean similarity |
-| **Reduce false negatives** | Decrease `--threshold`, decrease `--min-matching-refs`, use `--use-max-similarity` |
+| **Reduce false positives** | Increase `--threshold`, increase `--margin`, increase `--min-matching-refs`, use mean similarity |
+| **Reduce false negatives** | Decrease `--threshold`, decrease `--margin`, decrease `--min-matching-refs`, use `--use-max-similarity` |
 
 ### General Tuning
 
@@ -295,13 +290,13 @@ Matched Logo Labels
 # Default margin-based matching
 python test_logo_detection.py -n 20 --threshold 0.75 --margin 0.05
 
-# Multi-ref matching with mean similarity
+# Multi-ref matching with margin (recommended for reducing false positives)
 python test_logo_detection.py -n 20 --matching-method multi-ref \
-    --refs-per-logo 5 --min-matching-refs 2 --threshold 0.70
+    --refs-per-logo 5 --min-matching-refs 2 --threshold 0.70 --margin 0.05
 
 # Multi-ref matching with max similarity (more lenient)
 python test_logo_detection.py -n 20 --matching-method multi-ref \
-    --refs-per-logo 5 --min-matching-refs 1 --use-max-similarity
+    --refs-per-logo 5 --min-matching-refs 1 --use-max-similarity --margin 0.03
 
 # Reproducible test with seed
 python test_logo_detection.py -n 50 --seed 42 --clear-cache
diff --git a/run_comparison_tests.sh b/run_comparison_tests.sh
new file mode 100755
index 0000000..ce4c1da
--- /dev/null
+++ b/run_comparison_tests.sh
@@ -0,0 +1,76 @@
+#!/bin/bash
+#
+# Run logo detection tests with all three matching methods and save results.
+#
+
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+OUTPUT_FILE="${SCRIPT_DIR}/comparison_results.txt"
+
+# Common parameters
+NUM_LOGOS=20
+REFS_PER_LOGO=10
+POSITIVE_SAMPLES=20
+NEGATIVE_SAMPLES=100
+MIN_MATCHING_REFS=3
+
+# Use a fixed seed for reproducibility across methods
+SEED=42
+
+echo "Logo Detection Comparison Tests" > "$OUTPUT_FILE"
+echo "================================" >> "$OUTPUT_FILE"
+echo "Date: $(date)" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+echo "Running tests with:"
+echo "  Reference logos: $NUM_LOGOS"
+echo "  Refs per logo: $REFS_PER_LOGO"
+echo "  Positive samples: $POSITIVE_SAMPLES"
+echo "  Negative samples: $NEGATIVE_SAMPLES"
+echo "  Min matching refs: $MIN_MATCHING_REFS"
+echo "  Seed: $SEED"
+echo ""
+
+# Test 1: Margin-based matching
+echo "=== Test 1: Margin-based matching ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method margin \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo "" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+# Test 2: Multi-ref with mean similarity
+echo "=== Test 2: Multi-ref matching (mean similarity) ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo "" >> "$OUTPUT_FILE"
+echo "" >> "$OUTPUT_FILE"
+
+# Test 3: Multi-ref with max similarity
+echo "=== Test 3: Multi-ref matching (max similarity) ===" | tee -a "$OUTPUT_FILE"
+uv run python "$SCRIPT_DIR/test_logo_detection.py" \
+    --num-logos $NUM_LOGOS \
+    --refs-per-logo $REFS_PER_LOGO \
+    --positive-samples $POSITIVE_SAMPLES \
+    --negative-samples $NEGATIVE_SAMPLES \
+    --matching-method multi-ref \
+    --min-matching-refs $MIN_MATCHING_REFS \
+    --use-max-similarity \
+    --seed $SEED \
+    2>&1 | tee -a "$OUTPUT_FILE"
+
+echo ""
+echo "Results saved to: $OUTPUT_FILE"
\ No newline at end of file
diff --git a/test_logo_detection.py b/test_logo_detection.py
index 94d0581..65e4d60 100755
--- a/test_logo_detection.py
+++ b/test_logo_detection.py
@@ -231,7 +231,7 @@ def main():
         "--margin",
         type=float,
         default=0.05,
-        help="Required margin between best and second-best match for 'margin' method (default: 0.05)",
+        help="Required margin between best and second-best match (applies to both methods) (default: 0.05)",
     )
     parser.add_argument(
         "--matching-method",
@@ -453,6 +453,7 @@ def main():
                     similarity_threshold=args.threshold,
                     min_matching_refs=args.min_matching_refs,
                     use_mean_similarity=not args.use_max_similarity,
+                    margin=args.margin,
                 )
                 if match_result:
                     label, similarity, num_matching = match_result
@@ -511,9 +512,8 @@ def main():
     print(f"  CLIP similarity threshold: {args.threshold}")
     print(f"  DETR confidence threshold: {args.detr_threshold}")
     print(f"  Matching method:           {args.matching_method}")
-    if args.matching_method == "margin":
-        print(f"  Matching margin:           {args.margin}")
-    else:  # multi-ref
+    print(f"  Matching margin:           {args.margin}")
+    if args.matching_method == "multi-ref":
         print(f"  Min matching refs:         {args.min_matching_refs}")
         print(f"  Similarity aggregation:    {'max' if args.use_max_similarity else 'mean'}")
     if args.seed is not None: