Add simple matching method as baseline for comparison tests

- Add find_all_matches() method to DetectLogosDETR that returns all logos above similarity threshold without any rejection logic - Add --matching-method simple option to test script - Update run_comparison_tests.sh to include simple matching as Test 1 - Update documentation to describe simple matching method
2025-12-31 17:36:18 -05:00
parent 197e007591
commit 41bc0c701f
5 changed files with 174 additions and 40 deletions
--- a/logo_detection_detr.py
+++ b/logo_detection_detr.py
@ -394,6 +394,47 @@ class DetectLogosDETR:
        else:
            return None

+    def find_all_matches(
+        self,
+        detected_embedding: torch.Tensor,
+        reference_embeddings: List[Tuple[str, torch.Tensor]],
+        similarity_threshold: float = 0.7,
+    ) -> List[Tuple[str, float]]:
+        """
+        Find all matching reference logos above the similarity threshold.
+
+        Unlike find_best_match, this returns ALL logos that have at least one
+        reference above threshold. Each unique logo is returned once with its
+        highest similarity score.
+
+        Args:
+            detected_embedding: CLIP embedding from detected logo region
+            reference_embeddings: List of (label, embedding) tuples for reference logos
+            similarity_threshold: Minimum similarity to consider a match (0-1)
+
+        Returns:
+            List of (label, similarity) tuples for all matches above threshold,
+            sorted by similarity descending. Each logo appears at most once.
+        """
+        if not reference_embeddings:
+            return []
+
+        # Track best similarity for each logo
+        logo_best_sim: Dict[str, float] = {}
+
+        for label, ref_embedding in reference_embeddings:
+            similarity = self.compare_embeddings(detected_embedding, ref_embedding)
+
+            if similarity >= similarity_threshold:
+                if label not in logo_best_sim or similarity > logo_best_sim[label]:
+                    logo_best_sim[label] = similarity
+
+        # Convert to list and sort by similarity descending
+        matches = [(label, sim) for label, sim in logo_best_sim.items()]
+        matches.sort(key=lambda x: x[1], reverse=True)
+
+        return matches
+
    def find_best_match_multi_ref(
        self,
        detected_embedding: torch.Tensor,