Initial commit: Jersey detection test suite
Test scripts and utilities for evaluating vision-language models on jersey number detection using llama.cpp server.
This commit is contained in:
59
llama-swap-config.yaml
Normal file
59
llama-swap-config.yaml
Normal file
@ -0,0 +1,59 @@
|
||||
# llama-swap configuration for jersey detection testing
|
||||
# ==================================================
|
||||
# This configuration allows automatic model switching for testing
|
||||
# different vision language models with the jersey detection test script.
|
||||
#
|
||||
# Usage:
|
||||
# llama-swap --config llama-swap-config.yaml --listen localhost:8080
|
||||
#
|
||||
# Then use the test script with --model-tag:
|
||||
# python test_jersey_detection.py ./images jersey_prompt.txt --model-tag "lfm2-vl-1.6b"
|
||||
#
|
||||
# llama-swap will automatically load the requested model and swap models
|
||||
# as needed when you run tests with different --model-tag values.
|
||||
|
||||
models:
|
||||
# Small vision models (1-4B parameters)
|
||||
lfm2-vl-1.6b:
|
||||
name: "LiquidAI LFM2-VL 1.6B (F16)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf LiquidAI/LFM2-VL-1.6B-GGUF:F16
|
||||
|
||||
gemma-3-4b:
|
||||
name: "Gemma 3 4B Instruct (F16)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-4b-it-GGUF:F16
|
||||
|
||||
kimi-vl-3b:
|
||||
name: "Kimi VL A3B Thinking (F16)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf ggml-org/Kimi-VL-A3B-Thinking-2506-GGUF:F16
|
||||
|
||||
# Medium vision models (7-12B parameters)
|
||||
qwen2.5-vl-7b:
|
||||
name: "Qwen2.5-VL 7B Instruct (F16)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Qwen2.5-VL-7B-Instruct-GGUF:F16
|
||||
|
||||
gemma-3-12b:
|
||||
name: "Gemma 3 12B Instruct (F16)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-12b-it-GGUF:F16
|
||||
|
||||
# Large models (24-27B parameters)
|
||||
mistral-small-24b-q8:
|
||||
name: "Mistral Small 3.2 24B Instruct (Q8_K_XL)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF:Q8_K_XL
|
||||
|
||||
mistral-small-24b-q4:
|
||||
name: "Mistral Small 3.2 24B Instruct (Q4_K_XL)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/Mistral-Small-3.2-24B-Instruct-2506-GGUF:Q4_K_XL
|
||||
|
||||
gemma-3-27b:
|
||||
name: "Gemma 3 27B Instruct (Q8_0)"
|
||||
cmd: llama-server --no-mmap -ngl 999 -fa on --host 0.0.0.0 --port ${PORT} -hf unsloth/gemma-3-27b-it-GGUF:Q8_0
|
||||
|
||||
# Optional: Automatically unload models after 5 minutes of inactivity
|
||||
# Uncomment to enable
|
||||
# ttl: 300
|
||||
|
||||
# Optional: Preload a specific model on startup
|
||||
# Uncomment to enable
|
||||
# hooks:
|
||||
# onStartup:
|
||||
# - loadModel: qwen2.5-vl-7b
|
||||
Reference in New Issue
Block a user