# llama-swap configuration for jersey detection testing # ================================================== # This configuration allows automatic model switching for testing # different vision language models with the jersey detection test script. # # Usage: # llama-swap --config llama-swap-config.yaml --listen localhost:8080 # # Then use the test script with --model-tag: # python test_jersey_detection.py ./images jersey_prompt.txt --model-tag "lfm2-vl-1.6b" # # llama-swap will automatically load the requested model and swap models # as needed when you run tests with different --model-tag values. models: # Small vision models (1-4B parameters) lfm2-vl-1.6b: name: "LiquidAI LFM2-VL 1.6B (F16)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf LiquidAI/LFM2-VL-1.6B-GGUF:F16 gemma-3-4b: name: "Gemma 3 4B Instruct (F16)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-4b-it-GGUF:F16 kimi-vl-3b: name: "Kimi VL A3B Thinking (F16)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:F16 # Medium vision models (7-12B parameters) qwen2.5-vl-7b: name: "Qwen2.5-VL 7B Instruct (F16)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:F16 gemma-3-12b: name: "Gemma 3 12B Instruct (F16)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-12b-it-GGUF:F16 # Large models (24-27B parameters) mistral-small-24b-q8: name: "Mistral Small 3.2 24B Instruct (Q8_K_XL)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF:Q8_K_XL mistral-small-24b-q4: name: "Mistral Small 3.2 24B Instruct (Q4_K_XL)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF:Q4_K_XL gemma-3-27b: name: "Gemma 3 27B Instruct (Q8_0)" cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-27b-it-GGUF:Q8_0 # Optional: Automatically unload models after 5 minutes of inactivity # Uncomment to enable # ttl: 300 # Optional: Preload a specific model on startup # Uncomment to enable # hooks: # onStartup: # - loadModel: qwen2.5-vl-7b