Image-level splits allow the model to see some images from each logo brand during training, unlike logo-level splits where test brands are completely unseen. This is less rigorous but more representative of real-world use. Changes: - Add configs/image_level_splits.yaml with gentler training settings: - split_level: "image" for image-level splits - temperature: 0.15 (softer contrastive learning) - learning_rate: 5e-6 (slower learning) - max_epochs: 30 (more epochs) - Update training/dataset.py: - Add split_level parameter to LogoDataset - Implement _split_images() for image-level splitting - Update LogoContrastiveDataset to use split-specific image mappings - Update training/config.py: - Add split_level field to TrainingConfig - Update train_clip_logo.py: - Pass split_level to create_dataloaders Usage: uv run python train_clip_logo.py --config configs/image_level_splits.yaml
79 lines
2.1 KiB
YAML
79 lines
2.1 KiB
YAML
# Training configuration with IMAGE-LEVEL splits
|
|
#
|
|
# Unlike logo-level splits where test logos are completely unseen brands,
|
|
# image-level splits allow the model to see some images from each brand
|
|
# during training. This is less rigorous but more representative of
|
|
# real-world use where you have reference images for logos you want to detect.
|
|
#
|
|
# Also uses gentler contrastive learning settings to prevent over-separation.
|
|
#
|
|
# Usage:
|
|
# uv run python train_clip_logo.py --config configs/image_level_splits.yaml
|
|
|
|
# Base model
|
|
base_model: "openai/clip-vit-large-patch14"
|
|
|
|
# Dataset paths (relative to project root)
|
|
dataset_dir: "LogoDet-3K"
|
|
reference_dir: "reference_logos"
|
|
db_path: "test_data_mapping.db"
|
|
|
|
# Data split configuration
|
|
# split_level: "image" means images are split, not logo brands
|
|
# This allows test set to contain images from brands seen during training
|
|
split_level: "image"
|
|
train_split: 0.7
|
|
val_split: 0.15
|
|
test_split: 0.15
|
|
|
|
# Batch construction
|
|
batch_size: 16
|
|
logos_per_batch: 32
|
|
samples_per_logo: 4
|
|
gradient_accumulation_steps: 8
|
|
num_workers: 4
|
|
|
|
# Model architecture - same as before
|
|
lora_r: 16
|
|
lora_alpha: 32
|
|
lora_dropout: 0.1
|
|
freeze_layers: 12
|
|
use_gradient_checkpointing: true
|
|
|
|
# Training hyperparameters - GENTLER settings
|
|
learning_rate: 5.0e-6 # Reduced from 1e-5
|
|
weight_decay: 0.01
|
|
warmup_steps: 500
|
|
max_epochs: 30 # More epochs with slower learning
|
|
mixed_precision: true
|
|
|
|
# Loss function - HIGHER temperature for softer contrastive learning
|
|
temperature: 0.15 # Increased from 0.07
|
|
loss_type: "infonce"
|
|
triplet_margin: 0.2 # Reduced from 0.3
|
|
|
|
# Early stopping
|
|
patience: 7 # More patience with gentler learning
|
|
min_delta: 0.001
|
|
|
|
# Checkpoints and output
|
|
checkpoint_dir: "checkpoints_image_split"
|
|
output_dir: "models/logo_detection/clip_finetuned_image_split"
|
|
save_every_n_epochs: 5
|
|
|
|
# Logging
|
|
log_every_n_steps: 10
|
|
eval_every_n_epochs: 1
|
|
|
|
# Reproducibility
|
|
seed: 42
|
|
|
|
# Hard negative mining
|
|
use_hard_negatives: false
|
|
hard_negative_start_epoch: 10
|
|
hard_negatives_per_logo: 10
|
|
|
|
# Data augmentation
|
|
use_augmentation: true
|
|
augmentation_strength: "medium"
|