# Training configuration optimized for Jetson Orin AGX (~64GB shared memory) # # Usage: # uv run python train_clip_logo.py --config configs/jetson_orin.yaml # Base model base_model: "openai/clip-vit-large-patch14" # Dataset paths (relative to project root) dataset_dir: "LogoDet-3K" reference_dir: "reference_logos" db_path: "test_data_mapping.db" # Data split ratios (logo-level split for generalization testing) train_split: 0.7 val_split: 0.15 test_split: 0.15 # Batch construction # - batch_size: Number of batches loaded at once (keep low for memory) # - logos_per_batch: Different logo classes per contrastive batch # - samples_per_logo: Samples of each logo (creates positive pairs) # - Effective samples per step = logos_per_batch * samples_per_logo = 128 batch_size: 16 logos_per_batch: 32 samples_per_logo: 4 gradient_accumulation_steps: 8 # Effective batch = 128 num_workers: 4 # Model architecture # LoRA enables memory-efficient fine-tuning by training low-rank adapters # instead of full model weights lora_r: 16 # LoRA rank (0 to disable) lora_alpha: 32 # LoRA scaling factor lora_dropout: 0.1 # Dropout in LoRA layers freeze_layers: 12 # Freeze first 12 of 24 transformer layers use_gradient_checkpointing: true # Trade compute for memory # Training hyperparameters learning_rate: 1.0e-5 # Conservative LR for fine-tuning weight_decay: 0.01 # L2 regularization warmup_steps: 500 # LR warmup steps max_epochs: 20 # Maximum training epochs mixed_precision: true # FP16 training for memory efficiency # Loss function # InfoNCE is the contrastive loss used in CLIP training temperature: 0.07 # Similarity scaling (0.05-0.1 typical) loss_type: "infonce" # Options: infonce, supcon, triplet, combined triplet_margin: 0.3 # Only used if loss_type is triplet # Early stopping patience: 5 # Stop if no improvement for N epochs min_delta: 0.001 # Minimum improvement threshold # Checkpoints and output checkpoint_dir: "checkpoints" output_dir: "models/logo_detection/clip_finetuned" save_every_n_epochs: 5 # Logging log_every_n_steps: 10 eval_every_n_epochs: 1 # Reproducibility seed: 42 # Hard negative mining (advanced) # Enable after initial training epochs for harder examples use_hard_negatives: false hard_negative_start_epoch: 5 hard_negatives_per_logo: 10 # Data augmentation use_augmentation: true augmentation_strength: "medium" # light, medium, or strong