diff --git a/configs/cloud_rtx4090_image_split.yaml b/configs/cloud_rtx4090_image_split.yaml new file mode 100644 index 0000000..59c1ba5 --- /dev/null +++ b/configs/cloud_rtx4090_image_split.yaml @@ -0,0 +1,70 @@ +# Training configuration for RTX 4090 (24GB VRAM) with IMAGE-LEVEL splits +# +# Combines RTX 4090 hardware optimizations with image-level splitting and +# gentler contrastive learning for better generalization. +# +# Usage: +# python train_clip_logo.py --config configs/cloud_rtx4090_image_split.yaml +# +# Estimated training time: 5-7 hours (more epochs than logo-level) +# Estimated cost on RunPod: ~$4 + +# Base model +base_model: "openai/clip-vit-large-patch14" + +# Dataset paths +dataset_dir: "LogoDet-3K" +reference_dir: "reference_logos" +db_path: "test_data_mapping.db" + +# Data split configuration - IMAGE LEVEL +# Each logo brand will have images in all splits, allowing the model +# to see some examples of each brand during training. +split_level: "image" +train_split: 0.7 +val_split: 0.15 +test_split: 0.15 + +# Larger batches for faster training on 24GB VRAM +batch_size: 32 +logos_per_batch: 32 +samples_per_logo: 4 +gradient_accumulation_steps: 4 # Effective batch = 128 +num_workers: 8 + +# Model architecture +lora_r: 16 +lora_alpha: 32 +lora_dropout: 0.1 +freeze_layers: 12 +use_gradient_checkpointing: true + +# Training - GENTLER settings for better generalization +learning_rate: 5.0e-6 # Reduced from 1e-5 +weight_decay: 0.01 +warmup_steps: 500 +max_epochs: 30 # More epochs with slower learning +mixed_precision: true + +# Loss - HIGHER temperature for softer contrastive learning +temperature: 0.15 # Increased from 0.07 +loss_type: "infonce" +triplet_margin: 0.2 # Reduced from 0.3 + +# Early stopping - more patience with gentler learning +patience: 7 +min_delta: 0.001 + +# Output - separate directory for image-split model +checkpoint_dir: "checkpoints_image_split" +output_dir: "models/logo_detection/clip_finetuned_image_split" +save_every_n_epochs: 2 # Save frequently for cloud + +# Logging +log_every_n_steps: 10 +eval_every_n_epochs: 1 + +seed: 42 +use_hard_negatives: false +use_augmentation: true +augmentation_strength: "medium"