Add embedding model selection and comparison test scripts

- Update DetectLogosDETR to support both CLIP and DINOv2 models - Rename clip_model parameter to embedding_model - Add model type detection for different embedding extraction - DINOv2 uses CLS token, CLIP uses get_image_features() - Add -e/--embedding-model argument to test_logo_detection.py - Include model name in file output header - Add run_threshold_tests.sh for testing various threshold/margin values - Add run_model_comparison.sh for comparing CLIP vs DINOv2 models
2026-01-02 12:05:27 -05:00
parent a3008ee57f
commit 94db5bd40b
4 changed files with 312 additions and 30 deletions
--- a/test_logo_detection.py
+++ b/test_logo_detection.py
@ -203,6 +203,13 @@ def main():
        default=0.5,
        help="DETR detection confidence threshold (default: 0.5)",
    )
+    parser.add_argument(
+        "-e", "--embedding-model",
+        type=str,
+        default="openai/clip-vit-large-patch14",
+        help="Embedding model for feature extraction (default: openai/clip-vit-large-patch14). "
+             "Supports CLIP models (openai/clip-*) and DINOv2 models (facebook/dinov2-*)",
+    )
    parser.add_argument(
        "-s", "--seed",
        type=int,
@ -302,10 +309,11 @@ def main():
        logger.info(f"Loaded {len(cache)} cached embeddings")

    # Initialize detector
-    logger.info("Initializing logo detector...")
+    logger.info(f"Initializing logo detector with embedding model: {args.embedding_model}")
    detector = DetectLogosDETR(
        logger=logger,
        detr_threshold=args.detr_threshold,
+        embedding_model=args.embedding_model,
    )

    # Load ground truth (both mappings)
@ -633,18 +641,20 @@ def write_results_to_file(
    lines = [
        "=" * 70,
        f"TEST: {args.matching_method.upper()} MATCHING",
+        f"Model: {args.embedding_model}",
        f"Method: {method_desc}",
        "=" * 70,
        f"Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
        "",
        "Configuration:",
+        f"  Embedding model:           {args.embedding_model}",
        f"  Reference logos:           {num_logos}",
        f"  Refs per logo:             {args.refs_per_logo}",
        f"  Total reference embeddings:{total_refs}",
        f"  Positive samples/logo:     {args.positive_samples}",
        f"  Negative samples/logo:     {args.negative_samples}",
        f"  Test images processed:     {num_test_images}",
-        f"  CLIP threshold:            {args.threshold}",
+        f"  Similarity threshold:      {args.threshold}",
        f"  DETR threshold:            {args.detr_threshold}",
    ]