Compare commits

..

2 Commits

Author SHA1 Message Date
ea6fcec9ce Remove hybrid text+CLIP matching approach
The hybrid approach combined OCR text recognition with CLIP embeddings
to improve logo matching accuracy. After extensive testing, the approach
was abandoned because:

1. OCR quality on small logo crops is unreliable
2. Text filtering rejected correct matches as often as wrong ones
3. Best hybrid result (57.1% precision) was similar to baseline (55.1%)
4. Recall dropped significantly (52.6% vs 59.6%)
5. Added complexity (EasyOCR dependency, extra parameters) wasn't justified

Removed:
- Hybrid matching methods from DetectLogosDETR class
- Text extraction and similarity methods
- Hybrid test scripts and text_recognition.py module
- Hybrid-related CLI arguments from test_logo_detection.py

The baseline multi-ref matching with 0.70 threshold remains the
recommended approach for logo detection.
2026-01-08 12:48:39 -05:00
f777b049a3 Fix EasyOCR model path to use script-relative directory 2026-01-07 15:38:23 -05:00
3 changed files with 7 additions and 594 deletions

View File

@ -23,7 +23,6 @@ import cv2
import numpy as np import numpy as np
from pathlib import Path from pathlib import Path
from typing import List, Tuple, Dict, Optional, Any from typing import List, Tuple, Dict, Optional, Any
from difflib import SequenceMatcher
class DetectLogosDETR: class DetectLogosDETR:
@ -765,311 +764,4 @@ class DetectLogosDETR:
f"(threshold: {similarity_threshold})" f"(threshold: {similarity_threshold})"
) )
return matched_detections return matched_detections
# =========================================================================
# Hybrid Text + CLIP Matching
# =========================================================================
def set_text_detector(self, text_detector) -> None:
"""
Set an optional text detector for hybrid matching.
Args:
text_detector: Instance of DetectText class from text_recognition.py
"""
self.text_detector = text_detector
self.logger.info("Text detector enabled for hybrid matching")
def extract_text(self, image: np.ndarray, min_confidence: float = 0.3) -> List[str]:
"""
Extract text from an image using the text detector.
Args:
image: OpenCV image (BGR format)
min_confidence: Minimum OCR confidence to accept text
Returns:
List of detected text strings (lowercased, stripped)
"""
if not hasattr(self, 'text_detector') or self.text_detector is None:
return []
try:
results, _ = self.text_detector.detect(image)
# Filter by confidence and normalize text
texts = []
for text, confidence in results:
if confidence >= min_confidence:
# Normalize: lowercase, strip whitespace, remove special chars
normalized = text.lower().strip()
if len(normalized) >= 2: # Ignore single characters
texts.append(normalized)
return texts
except Exception as e:
self.logger.warning(f"Text extraction failed: {e}")
return []
def extract_text_pil(self, pil_image: Image.Image, min_confidence: float = 0.3) -> List[str]:
"""
Extract text from a PIL image.
Args:
pil_image: PIL Image (RGB format)
min_confidence: Minimum OCR confidence
Returns:
List of detected text strings
"""
# Convert PIL to OpenCV format
cv_image = cv2.cvtColor(np.array(pil_image), cv2.COLOR_RGB2BGR)
return self.extract_text(cv_image, min_confidence)
@staticmethod
def compute_text_similarity(text1_list: List[str], text2_list: List[str]) -> float:
"""
Compute fuzzy text similarity between two lists of text strings.
Uses a combination of exact matches and fuzzy matching to handle
OCR variations like case differences, spacing, and minor errors.
Args:
text1_list: List of text strings from first image
text2_list: List of text strings from second image
Returns:
Similarity score between 0 and 1
"""
if not text1_list or not text2_list:
return 0.0
# Combine all text into single strings for overall comparison
text1_combined = " ".join(sorted(text1_list))
text2_combined = " ".join(sorted(text2_list))
# Method 1: Sequence matching on combined text
seq_similarity = SequenceMatcher(None, text1_combined, text2_combined).ratio()
# Method 2: Token overlap (Jaccard-like)
# Split into tokens
tokens1 = set(text1_combined.split())
tokens2 = set(text2_combined.split())
if tokens1 and tokens2:
intersection = len(tokens1 & tokens2)
union = len(tokens1 | tokens2)
token_similarity = intersection / union if union > 0 else 0
else:
token_similarity = 0
# Method 3: Best pairwise match for each text in list1
pairwise_scores = []
for t1 in text1_list:
best_match = 0
for t2 in text2_list:
score = SequenceMatcher(None, t1, t2).ratio()
best_match = max(best_match, score)
pairwise_scores.append(best_match)
pairwise_similarity = sum(pairwise_scores) / len(pairwise_scores) if pairwise_scores else 0
# Combine methods (weighted average)
combined = (seq_similarity * 0.3 + token_similarity * 0.3 + pairwise_similarity * 0.4)
return combined
@staticmethod
def texts_match(
ref_texts: List[str],
det_texts: List[str],
threshold: float = 0.5
) -> Tuple[bool, float]:
"""
Determine if texts match above a threshold.
Args:
ref_texts: Text from reference logo
det_texts: Text from detected region
threshold: Minimum similarity to consider a match
Returns:
Tuple of (is_match, similarity_score)
"""
if not ref_texts:
# Reference has no text - can't match on text
return (False, 0.0)
if not det_texts:
# Reference has text but detection doesn't - no text match
return (False, 0.0)
similarity = DetectLogosDETR.compute_text_similarity(ref_texts, det_texts)
return (similarity >= threshold, similarity)
def find_best_match_hybrid(
self,
detected_embedding: torch.Tensor,
detected_image: np.ndarray,
reference_data: Dict[str, Dict[str, Any]],
clip_threshold: float = 0.70,
clip_threshold_with_text: float = 0.60,
clip_threshold_text_mismatch: float = 0.80,
text_similarity_threshold: float = 0.5,
margin: float = 0.05,
use_mean_similarity: bool = False,
) -> Optional[Tuple[str, float, Dict[str, Any]]]:
"""
Find best match using hybrid text + CLIP approach.
Strategy:
- If reference has text AND detection has matching text:
→ Use lower CLIP threshold (text provides additional confidence)
- If reference has text but detection doesn't match:
→ Use higher CLIP threshold (need more visual confidence)
- If reference has no text:
→ Use standard CLIP threshold
Args:
detected_embedding: CLIP embedding from detected logo region
detected_image: OpenCV image of the detected region (for text extraction)
reference_data: Dict mapping logo name to:
{
'embeddings': List[torch.Tensor], # CLIP embeddings
'texts': List[str], # Extracted text from reference
}
clip_threshold: Standard CLIP threshold for no-text references
clip_threshold_with_text: Lower threshold when text matches
clip_threshold_text_mismatch: Higher threshold when text expected but missing
text_similarity_threshold: Threshold for text matching
margin: Required margin between best and second-best
use_mean_similarity: Use mean vs max for multi-ref aggregation
Returns:
Tuple of (label, clip_similarity, match_info) or None
match_info contains: text_matched, text_similarity, threshold_used
"""
if not reference_data:
return None
# Extract text from detected region
detected_texts = self.extract_text(detected_image)
# Calculate scores for all logos
logo_scores = []
for label, ref_info in reference_data.items():
ref_embeddings = ref_info.get('embeddings', [])
ref_texts = ref_info.get('texts', [])
if not ref_embeddings:
continue
# Calculate CLIP similarity
similarities = []
for ref_emb in ref_embeddings:
sim = self.compare_embeddings(detected_embedding, ref_emb)
similarities.append(sim)
if use_mean_similarity:
clip_score = sum(similarities) / len(similarities)
else:
clip_score = max(similarities)
# Determine text match status and appropriate threshold
has_ref_text = len(ref_texts) > 0
text_matched, text_sim = self.texts_match(
ref_texts, detected_texts, text_similarity_threshold
)
if has_ref_text:
if text_matched:
# Text matches - use lower threshold, boost confidence
threshold_used = clip_threshold_with_text
match_type = "text_match"
else:
# Reference has text but detection doesn't match
# Require higher CLIP threshold
threshold_used = clip_threshold_text_mismatch
match_type = "text_mismatch"
else:
# No text in reference - standard matching
threshold_used = clip_threshold
match_type = "no_text"
text_sim = 0.0
# Check if CLIP score meets the appropriate threshold
if clip_score >= threshold_used:
logo_scores.append({
'label': label,
'clip_score': clip_score,
'text_matched': text_matched,
'text_similarity': text_sim,
'threshold_used': threshold_used,
'match_type': match_type,
'has_ref_text': has_ref_text,
})
if not logo_scores:
return None
# Sort by CLIP score descending
logo_scores.sort(key=lambda x: x['clip_score'], reverse=True)
best = logo_scores[0]
# Check margin against second-best
if margin > 0 and len(logo_scores) > 1:
second_best_score = logo_scores[1]['clip_score']
if best['clip_score'] - second_best_score < margin:
return None
match_info = {
'text_matched': best['text_matched'],
'text_similarity': best['text_similarity'],
'threshold_used': best['threshold_used'],
'match_type': best['match_type'],
'has_ref_text': best['has_ref_text'],
'detected_texts': detected_texts,
}
return (best['label'], best['clip_score'], match_info)
def prepare_reference_data_hybrid(
self,
reference_images: Dict[str, List[np.ndarray]],
text_min_confidence: float = 0.3,
) -> Dict[str, Dict[str, Any]]:
"""
Prepare reference data for hybrid matching by computing embeddings and extracting text.
Args:
reference_images: Dict mapping logo name to list of reference images (OpenCV BGR)
text_min_confidence: Minimum confidence for text extraction
Returns:
Dict mapping logo name to {'embeddings': [...], 'texts': [...]}
"""
reference_data = {}
for logo_name, images in reference_images.items():
embeddings = []
all_texts = set()
for img in images:
# Compute CLIP embedding
emb = self.get_embedding(img)
embeddings.append(emb)
# Extract text
texts = self.extract_text(img, text_min_confidence)
all_texts.update(texts)
reference_data[logo_name] = {
'embeddings': embeddings,
'texts': list(all_texts),
}
if all_texts:
self.logger.debug(f"Reference '{logo_name}' has text: {all_texts}")
return reference_data

View File

@ -1,168 +0,0 @@
#!/bin/bash
#
# Test the hybrid text+CLIP matching approach for logo detection.
#
# This approach uses text recognition to improve logo matching:
# - If reference logo has text and detection matches it: use lower CLIP threshold
# - If reference logo has text but detection doesn't match: use higher CLIP threshold
# - If reference logo has no text: use standard CLIP threshold
#
# Usage:
# ./run_hybrid_test.sh
#
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
OUTPUT_FILE="${SCRIPT_DIR}/test_results/hybrid_matching_results.txt"
# Model - baseline CLIP
MODEL="openai/clip-vit-large-patch14"
# Fixed parameters
NUM_LOGOS=20
REFS_PER_LOGO=10
POSITIVE_SAMPLES=20
NEGATIVE_SAMPLES=100
SEED=42
# Create output directory if needed
mkdir -p "${SCRIPT_DIR}/test_results"
# Clear output file and write header
cat > "$OUTPUT_FILE" << EOF
Hybrid Text+CLIP Matching Test Results
======================================
Date: $(date)
Model: ${MODEL}
Fixed Parameters:
Number of logo brands: ${NUM_LOGOS}
Refs per logo: ${REFS_PER_LOGO}
Positive samples/logo: ${POSITIVE_SAMPLES}
Negative samples/logo: ${NEGATIVE_SAMPLES}
Seed: ${SEED}
EOF
echo "Hybrid Text+CLIP Matching Test"
echo "==============================="
echo "Model: ${MODEL}"
echo ""
# Test 1: Compare hybrid vs multi-ref baseline
echo "=== Test 1: Multi-ref baseline (for comparison) ==="
echo "" >> "$OUTPUT_FILE"
echo "=== BASELINE: Multi-ref (max) at threshold 0.70 ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method multi-ref \
--min-matching-refs 1 \
--use-max-similarity \
--threshold 0.70 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 2: Hybrid with default thresholds
echo "=== Test 2: Hybrid with default thresholds ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: default thresholds (0.70/0.60/0.80) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 3: Hybrid with more aggressive text bonus
echo "=== Test 3: Hybrid with lower text-match threshold ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: aggressive text bonus (0.70/0.55/0.80) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.55 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 4: Hybrid with stricter text mismatch penalty
echo "=== Test 4: Hybrid with stricter text mismatch penalty ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: strict mismatch (0.70/0.60/0.85) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.85 \
--text-similarity-threshold 0.5 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
# Test 5: Hybrid with lower text similarity threshold (more lenient OCR matching)
echo "=== Test 5: Hybrid with lenient text matching ==="
echo "" >> "$OUTPUT_FILE"
echo "=== HYBRID: lenient text matching (text_sim=0.4) ===" >> "$OUTPUT_FILE"
uv run python "$SCRIPT_DIR/test_logo_detection.py" \
--num-logos $NUM_LOGOS \
--refs-per-logo $REFS_PER_LOGO \
--positive-samples $POSITIVE_SAMPLES \
--negative-samples $NEGATIVE_SAMPLES \
--matching-method hybrid \
--threshold 0.70 \
--hybrid-text-threshold 0.60 \
--hybrid-no-text-threshold 0.80 \
--text-similarity-threshold 0.4 \
--margin 0.05 \
--seed $SEED \
--embedding-model "$MODEL" \
--output-file "$OUTPUT_FILE" \
--no-cache
echo ""
echo "======================================="
echo "Tests complete!"
echo "Results saved to: $OUTPUT_FILE"
echo "======================================="

View File

@ -243,12 +243,11 @@ def main():
parser.add_argument( parser.add_argument(
"--matching-method", "--matching-method",
type=str, type=str,
choices=["simple", "margin", "multi-ref", "hybrid"], choices=["simple", "margin", "multi-ref"],
default="margin", default="margin",
help="Matching method: 'simple' returns all matches above threshold, " help="Matching method: 'simple' returns all matches above threshold, "
"'margin' requires confidence margin over 2nd best, " "'margin' requires confidence margin over 2nd best, "
"'multi-ref' aggregates scores across reference images, " "'multi-ref' aggregates scores across reference images (default: margin)",
"'hybrid' combines text recognition with CLIP (default: margin)",
) )
parser.add_argument( parser.add_argument(
"--min-matching-refs", "--min-matching-refs",
@ -261,25 +260,6 @@ def main():
action="store_true", action="store_true",
help="For 'multi-ref' method: use max similarity instead of mean across references", help="For 'multi-ref' method: use max similarity instead of mean across references",
) )
# Hybrid method arguments
parser.add_argument(
"--hybrid-text-threshold",
type=float,
default=0.60,
help="For 'hybrid' method: CLIP threshold when text matches (default: 0.60)",
)
parser.add_argument(
"--hybrid-no-text-threshold",
type=float,
default=0.80,
help="For 'hybrid' method: CLIP threshold when text expected but not found (default: 0.80)",
)
parser.add_argument(
"--text-similarity-threshold",
type=float,
default=0.5,
help="For 'hybrid' method: minimum text similarity to consider a match (default: 0.5)",
)
parser.add_argument( parser.add_argument(
"-v", "--verbose", "-v", "--verbose",
action="store_true", action="store_true",
@ -352,14 +332,6 @@ def main():
preprocess_mode=args.preprocess_mode, preprocess_mode=args.preprocess_mode,
) )
# Initialize text detector for hybrid method
text_detector = None
if args.matching_method == "hybrid":
logger.info("Initializing text detector for hybrid matching...")
from text_recognition import DetectText
text_detector = DetectText(logger=logger, threshold=0.3)
detector.set_text_detector(text_detector)
# Load ground truth (both mappings) # Load ground truth (both mappings)
logger.info("Loading ground truth from database...") logger.info("Loading ground truth from database...")
image_to_logos, logo_to_images = get_ground_truth(db_path) image_to_logos, logo_to_images = get_ground_truth(db_path)
@ -377,15 +349,10 @@ def main():
multi_ref_embeddings: Dict[str, List[torch.Tensor]] = {} multi_ref_embeddings: Dict[str, List[torch.Tensor]] = {}
# List for margin-based matching: (logo_name, embedding) tuples # List for margin-based matching: (logo_name, embedding) tuples
reference_embeddings: List[Tuple[str, torch.Tensor]] = [] reference_embeddings: List[Tuple[str, torch.Tensor]] = []
# Dict for hybrid matching: logo_name -> {'embeddings': [...], 'texts': [...]}
hybrid_reference_data: Dict[str, Dict[str, Any]] = {}
total_refs = 0 total_refs = 0
logos_with_text = 0
for logo_name, ref_filenames in tqdm(sampled_logos.items(), desc="Reference logos"): for logo_name, ref_filenames in tqdm(sampled_logos.items(), desc="Reference logos"):
multi_ref_embeddings[logo_name] = [] multi_ref_embeddings[logo_name] = []
if args.matching_method == "hybrid":
hybrid_reference_data[logo_name] = {'embeddings': [], 'texts': set()}
for ref_filename in ref_filenames: for ref_filename in ref_filenames:
ref_path = reference_dir / ref_filename ref_path = reference_dir / ref_filename
@ -398,15 +365,12 @@ def main():
cache_key = f"ref:{ref_filename}" cache_key = f"ref:{ref_filename}"
embedding = cache.get(cache_key) if cache else None embedding = cache.get(cache_key) if cache else None
# Load image if needed (for embedding or text extraction) # Load image if needed for embedding
img = None if embedding is None:
if embedding is None or args.matching_method == "hybrid":
img = load_image(ref_path) img = load_image(ref_path)
if img is None: if img is None:
logger.warning(f"Failed to load reference logo: {ref_path}") logger.warning(f"Failed to load reference logo: {ref_path}")
continue continue
if embedding is None:
embedding = detector.get_embedding(img) embedding = detector.get_embedding(img)
if cache: if cache:
cache.put(cache_key, embedding) cache.put(cache_key, embedding)
@ -415,21 +379,7 @@ def main():
reference_embeddings.append((logo_name, embedding)) reference_embeddings.append((logo_name, embedding))
total_refs += 1 total_refs += 1
# Extract text for hybrid method
if args.matching_method == "hybrid" and img is not None:
hybrid_reference_data[logo_name]['embeddings'].append(embedding)
texts = detector.extract_text(img, min_confidence=0.3)
hybrid_reference_data[logo_name]['texts'].update(texts)
# Convert text set to list for hybrid data
if args.matching_method == "hybrid":
hybrid_reference_data[logo_name]['texts'] = list(hybrid_reference_data[logo_name]['texts'])
if hybrid_reference_data[logo_name]['texts']:
logos_with_text += 1
logger.info(f"Computed {total_refs} embeddings for {len(sampled_logos)} logos") logger.info(f"Computed {total_refs} embeddings for {len(sampled_logos)} logos")
if args.matching_method == "hybrid":
logger.info(f"Extracted text from {logos_with_text}/{len(sampled_logos)} reference logos")
# Build test set: for each logo, sample positive and negative images # Build test set: for each logo, sample positive and negative images
logger.info(f"Sampling test images: {args.positive_samples} positive, {args.negative_samples} negative per logo...") logger.info(f"Sampling test images: {args.positive_samples} positive, {args.negative_samples} negative per logo...")
@ -504,14 +454,7 @@ def main():
cache_key = f"det:{test_filename}" cache_key = f"det:{test_filename}"
cached_detections = cache.get(cache_key) if cache else None cached_detections = cache.get(cache_key) if cache else None
# For hybrid matching, we always need the original image for text extraction
test_img = None test_img = None
if args.matching_method == "hybrid":
test_img = load_image(test_path)
if test_img is None:
logger.warning(f"Failed to load test image: {test_path}")
continue
if cached_detections is not None: if cached_detections is not None:
# Cached detections contain serialized box data and embeddings # Cached detections contain serialized box data and embeddings
detections = cached_detections detections = cached_detections
@ -651,50 +594,6 @@ def main():
"correct": is_correct, "correct": is_correct,
}) })
else: # hybrid
# Hybrid matching: combines text recognition with CLIP
# Extract crop from original image for text extraction
box = detection["box"]
crop = test_img[
int(box["ymin"]):int(box["ymax"]),
int(box["xmin"]):int(box["xmax"])
]
match_result = detector.find_best_match_hybrid(
detected_embedding=detection["embedding"],
detected_image=crop,
reference_data=hybrid_reference_data,
clip_threshold=args.threshold,
clip_threshold_with_text=args.hybrid_text_threshold,
clip_threshold_text_mismatch=args.hybrid_no_text_threshold,
text_similarity_threshold=args.text_similarity_threshold,
margin=args.margin,
use_mean_similarity=not args.use_max_similarity,
)
if match_result:
label, similarity, match_info = match_result
matched_logos.add(label)
is_correct = label in expected_logos
if is_correct:
true_positives += 1
if args.similarity_details:
similarity_details["true_positive_sims"].append(similarity)
else:
false_positives += 1
if args.similarity_details:
similarity_details["false_positive_sims"].append(similarity)
results.append({
"test_image": test_filename,
"matched_logo": label,
"similarity": similarity,
"correct": is_correct,
"text_matched": match_info.get("text_matched", False),
"text_similarity": match_info.get("text_similarity", 0),
"match_type": match_info.get("match_type", "unknown"),
})
# Count missed detections (false negatives) # Count missed detections (false negatives)
missed = expected_logos - matched_logos missed = expected_logos - matched_logos
false_negatives += len(missed) false_negatives += len(missed)
@ -742,16 +641,11 @@ def main():
print(f" DETR confidence threshold: {args.detr_threshold}") print(f" DETR confidence threshold: {args.detr_threshold}")
print(f" Preprocess mode: {args.preprocess_mode}") print(f" Preprocess mode: {args.preprocess_mode}")
print(f" Matching method: {args.matching_method}") print(f" Matching method: {args.matching_method}")
if args.matching_method in ("margin", "multi-ref", "hybrid"): if args.matching_method in ("margin", "multi-ref"):
print(f" Matching margin: {args.margin}") print(f" Matching margin: {args.margin}")
if args.matching_method == "multi-ref": if args.matching_method == "multi-ref":
print(f" Min matching refs: {args.min_matching_refs}") print(f" Min matching refs: {args.min_matching_refs}")
print(f" Similarity aggregation: {'max' if args.use_max_similarity else 'mean'}") print(f" Similarity aggregation: {'max' if args.use_max_similarity else 'mean'}")
if args.matching_method == "hybrid":
print(f" CLIP threshold (text match): {args.hybrid_text_threshold}")
print(f" CLIP threshold (no text): {args.hybrid_no_text_threshold}")
print(f" Text similarity threshold: {args.text_similarity_threshold}")
print(f" Refs with text: {logos_with_text}/{len(sampled_logos)}")
if args.seed is not None: if args.seed is not None:
print(f" Random seed: {args.seed}") print(f" Random seed: {args.seed}")
@ -939,14 +833,9 @@ def write_results_to_file(
method_desc = "Simple (all matches above threshold)" method_desc = "Simple (all matches above threshold)"
elif args.matching_method == "margin": elif args.matching_method == "margin":
method_desc = f"Margin-based (margin={args.margin})" method_desc = f"Margin-based (margin={args.margin})"
elif args.matching_method == "multi-ref": else: # multi-ref
agg = "max" if args.use_max_similarity else "mean" agg = "max" if args.use_max_similarity else "mean"
method_desc = f"Multi-ref ({agg}, min_refs={args.min_matching_refs}, margin={args.margin})" method_desc = f"Multi-ref ({agg}, min_refs={args.min_matching_refs}, margin={args.margin})"
else: # hybrid
method_desc = (
f"Hybrid (text+CLIP, text_thresh={args.hybrid_text_threshold}, "
f"no_text_thresh={args.hybrid_no_text_threshold}, margin={args.margin})"
)
lines = [ lines = [
"=" * 70, "=" * 70,