logo_test/configs/jetson_orin.yaml

# Training configuration optimized for Jetson Orin AGX (~64GB shared memory)
#
# Usage:
#   uv run python train_clip_logo.py --config configs/jetson_orin.yaml

# Base model
base_model: "openai/clip-vit-large-patch14"

# Dataset paths (relative to project root)
dataset_dir: "LogoDet-3K"
reference_dir: "reference_logos"
db_path: "test_data_mapping.db"

# Data split ratios (logo-level split for generalization testing)
train_split: 0.7
val_split: 0.15
test_split: 0.15

# Batch construction
# - batch_size: Number of batches loaded at once (keep low for memory)
# - logos_per_batch: Different logo classes per contrastive batch
# - samples_per_logo: Samples of each logo (creates positive pairs)
# - Effective samples per step = logos_per_batch * samples_per_logo = 128
batch_size: 16
logos_per_batch: 32
samples_per_logo: 4
gradient_accumulation_steps: 8  # Effective batch = 128
num_workers: 4

# Model architecture
# LoRA enables memory-efficient fine-tuning by training low-rank adapters
# instead of full model weights
lora_r: 16                      # LoRA rank (0 to disable)
lora_alpha: 32                  # LoRA scaling factor
lora_dropout: 0.1               # Dropout in LoRA layers
freeze_layers: 12               # Freeze first 12 of 24 transformer layers
use_gradient_checkpointing: true  # Trade compute for memory

# Training hyperparameters
learning_rate: 1.0e-5           # Conservative LR for fine-tuning
weight_decay: 0.01              # L2 regularization
warmup_steps: 500               # LR warmup steps
max_epochs: 20                  # Maximum training epochs
mixed_precision: true           # FP16 training for memory efficiency

# Loss function
# InfoNCE is the contrastive loss used in CLIP training
temperature: 0.07               # Similarity scaling (0.05-0.1 typical)
loss_type: "infonce"            # Options: infonce, supcon, triplet, combined
triplet_margin: 0.3             # Only used if loss_type is triplet

# Early stopping
patience: 5                     # Stop if no improvement for N epochs
min_delta: 0.001                # Minimum improvement threshold

# Checkpoints and output
checkpoint_dir: "checkpoints"
output_dir: "models/logo_detection/clip_finetuned"
save_every_n_epochs: 5

# Logging
log_every_n_steps: 10
eval_every_n_epochs: 1

# Reproducibility
seed: 42

# Hard negative mining (advanced)
# Enable after initial training epochs for harder examples
use_hard_negatives: false
hard_negative_start_epoch: 5
hard_negatives_per_logo: 10

# Data augmentation
use_augmentation: true
augmentation_strength: "medium"  # light, medium, or strong