Add CLIP fine-tuning pipeline for logo recognition
Implement contrastive learning with LoRA to fine-tune CLIP's vision encoder on LogoDet-3K dataset for improved logo embedding similarity. New training module (training/): - config.py: TrainingConfig dataclass with all hyperparameters - dataset.py: LogoContrastiveDataset with logo-level splits - model.py: LogoFineTunedCLIP wrapper with LoRA support - losses.py: InfoNCE, TripletLoss, SupConLoss implementations - trainer.py: Training loop with mixed precision and checkpointing - evaluation.py: EmbeddingEvaluator for validation metrics New scripts: - train_clip_logo.py: Main training entry point - export_model.py: Export to HuggingFace-compatible format Configurations: - configs/jetson_orin.yaml: Optimized for Jetson Orin AGX - configs/cloud_rtx4090.yaml: Optimized for 24GB cloud GPUs - configs/cloud_a100.yaml: Optimized for 80GB cloud GPUs Documentation: - CLIP_FINETUNING.md: Training guide and usage instructions - CLOUD_TRAINING.md: Cloud GPU recommendations and cost estimates Modified: - logo_detection_detr.py: Add fine-tuned model loading support - pyproject.toml: Add peft, pyyaml, torchvision dependencies
This commit is contained in:
141
training/config.py
Normal file
141
training/config.py
Normal file
@ -0,0 +1,141 @@
|
||||
"""
|
||||
Training configuration for CLIP fine-tuning.
|
||||
"""
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import List, Optional
|
||||
import yaml
|
||||
|
||||
|
||||
@dataclass
|
||||
class TrainingConfig:
|
||||
"""Configuration for CLIP logo fine-tuning."""
|
||||
|
||||
# Base model
|
||||
base_model: str = "openai/clip-vit-large-patch14"
|
||||
|
||||
# Dataset paths
|
||||
dataset_dir: str = "LogoDet-3K"
|
||||
reference_dir: str = "reference_logos"
|
||||
db_path: str = "test_data_mapping.db"
|
||||
|
||||
# Data split ratios
|
||||
train_split: float = 0.7
|
||||
val_split: float = 0.15
|
||||
test_split: float = 0.15
|
||||
|
||||
# Batch construction
|
||||
batch_size: int = 16
|
||||
logos_per_batch: int = 32
|
||||
samples_per_logo: int = 4
|
||||
gradient_accumulation_steps: int = 8
|
||||
num_workers: int = 4
|
||||
|
||||
# Model architecture
|
||||
lora_r: int = 16
|
||||
lora_alpha: int = 32
|
||||
lora_dropout: float = 0.1
|
||||
freeze_layers: int = 12
|
||||
use_gradient_checkpointing: bool = True
|
||||
|
||||
# Training hyperparameters
|
||||
learning_rate: float = 1e-5
|
||||
weight_decay: float = 0.01
|
||||
warmup_steps: int = 500
|
||||
max_epochs: int = 20
|
||||
mixed_precision: bool = True
|
||||
|
||||
# Loss function
|
||||
temperature: float = 0.07
|
||||
loss_type: str = "infonce" # "infonce" or "triplet"
|
||||
triplet_margin: float = 0.3
|
||||
|
||||
# Early stopping
|
||||
patience: int = 5
|
||||
min_delta: float = 0.001
|
||||
|
||||
# Checkpoints and output
|
||||
checkpoint_dir: str = "checkpoints"
|
||||
output_dir: str = "models/logo_detection/clip_finetuned"
|
||||
save_every_n_epochs: int = 5
|
||||
|
||||
# Logging
|
||||
log_every_n_steps: int = 10
|
||||
eval_every_n_epochs: int = 1
|
||||
|
||||
# Random seed for reproducibility
|
||||
seed: int = 42
|
||||
|
||||
# Hard negative mining
|
||||
use_hard_negatives: bool = False
|
||||
hard_negative_start_epoch: int = 5
|
||||
hard_negatives_per_logo: int = 10
|
||||
|
||||
# Data augmentation
|
||||
use_augmentation: bool = True
|
||||
augmentation_strength: str = "medium" # "light", "medium", "strong"
|
||||
|
||||
@classmethod
|
||||
def from_yaml(cls, yaml_path: str) -> "TrainingConfig":
|
||||
"""Load configuration from YAML file."""
|
||||
with open(yaml_path, "r") as f:
|
||||
config_dict = yaml.safe_load(f)
|
||||
return cls(**config_dict)
|
||||
|
||||
def to_yaml(self, yaml_path: str) -> None:
|
||||
"""Save configuration to YAML file."""
|
||||
Path(yaml_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
with open(yaml_path, "w") as f:
|
||||
yaml.dump(self.__dict__, f, default_flow_style=False, sort_keys=False)
|
||||
|
||||
def validate(self) -> List[str]:
|
||||
"""Validate configuration and return list of warnings."""
|
||||
warnings = []
|
||||
|
||||
# Check split ratios
|
||||
total_split = self.train_split + self.val_split + self.test_split
|
||||
if abs(total_split - 1.0) > 0.01:
|
||||
warnings.append(
|
||||
f"Split ratios sum to {total_split}, expected 1.0"
|
||||
)
|
||||
|
||||
# Check batch construction
|
||||
effective_batch = self.batch_size * self.gradient_accumulation_steps
|
||||
if effective_batch < 64:
|
||||
warnings.append(
|
||||
f"Effective batch size ({effective_batch}) is small for contrastive learning. "
|
||||
"Consider increasing batch_size or gradient_accumulation_steps."
|
||||
)
|
||||
|
||||
# Check LoRA config
|
||||
if self.lora_r > 0 and self.lora_alpha < self.lora_r:
|
||||
warnings.append(
|
||||
f"lora_alpha ({self.lora_alpha}) < lora_r ({self.lora_r}). "
|
||||
"This may reduce LoRA effectiveness."
|
||||
)
|
||||
|
||||
# Check freeze layers
|
||||
if self.freeze_layers < 0:
|
||||
warnings.append("freeze_layers should be >= 0")
|
||||
|
||||
# Check temperature
|
||||
if self.temperature <= 0:
|
||||
warnings.append("temperature must be positive")
|
||||
elif self.temperature > 1.0:
|
||||
warnings.append(
|
||||
f"temperature ({self.temperature}) is high. "
|
||||
"Typical values are 0.05-0.1."
|
||||
)
|
||||
|
||||
return warnings
|
||||
|
||||
@property
|
||||
def effective_batch_size(self) -> int:
|
||||
"""Calculate effective batch size with gradient accumulation."""
|
||||
return self.batch_size * self.gradient_accumulation_steps
|
||||
|
||||
@property
|
||||
def samples_per_batch(self) -> int:
|
||||
"""Total samples in one batch (logos_per_batch * samples_per_logo)."""
|
||||
return self.logos_per_batch * self.samples_per_logo
|
||||
Reference in New Issue
Block a user