Test scripts and utilities for evaluating vision-language models on jersey number detection using llama.cpp server.
237 lines
7.8 KiB
Python
237 lines
7.8 KiB
Python
import base64
|
|
import json
|
|
import cv2
|
|
import numpy as np
|
|
import requests
|
|
from typing import List, Dict, Any, Optional, Union
|
|
|
|
|
|
class LlamaCppClient:
|
|
"""A Python client for interacting with a llama.cpp server."""
|
|
|
|
def __init__(self, base_url: str = "http://192.168.1.34:8080"):
|
|
"""
|
|
Initialize the client with the base URL of the llama.cpp server.
|
|
|
|
Args:
|
|
base_url: The base URL of the llama.cpp server (default: http://192.168.1.34:8080)
|
|
"""
|
|
self.base_url = base_url.rstrip('/')
|
|
|
|
def health_check(self) -> Dict[str, Any]:
|
|
"""
|
|
Check the health status of the server.
|
|
|
|
Returns:
|
|
Health status response from the server
|
|
"""
|
|
response = requests.get(f"{self.base_url}/health")
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
def get_models(self) -> Dict[str, Any]:
|
|
"""
|
|
Get information about loaded models.
|
|
|
|
Returns:
|
|
Model information from the server
|
|
"""
|
|
response = requests.get(f"{self.base_url}/v1/models")
|
|
response.raise_for_status()
|
|
return response.json()
|
|
|
|
def chat_completion(
|
|
self,
|
|
messages: List[Dict[str, Any]],
|
|
temperature: float = 0.1,
|
|
min_p: float = 0.15,
|
|
repetition_penalty: float = 1.05,
|
|
min_image_tokens: int = 64,
|
|
max_image_tokens: int = 256,
|
|
do_image_splitting: bool = True,
|
|
max_tokens: int = -1,
|
|
stream: bool = False,
|
|
**kwargs
|
|
) -> Union[Dict[str, Any], requests.Response]:
|
|
"""
|
|
Generate a chat completion using the OpenAI-compatible API.
|
|
|
|
Args:
|
|
messages: List of message dictionaries with role and content
|
|
temperature: Sampling temperature (default: 0.1)
|
|
min_p: Minimum probability for sampling (default: 0.15)
|
|
repetition_penalty: Repetition penalty factor (default: 1.05)
|
|
min_image_tokens: Minimum image tokens (default: 64)
|
|
max_image_tokens: Maximum image tokens (default: 256)
|
|
do_image_splitting: Whether to split images (default: True)
|
|
max_tokens: Maximum tokens to generate (default: -1 for infinity)
|
|
stream: Whether to stream the response (default: False)
|
|
**kwargs: Additional parameters for the completion
|
|
|
|
Returns:
|
|
Completion response or streaming response
|
|
"""
|
|
payload = {
|
|
"messages": messages,
|
|
"temperature": temperature,
|
|
"min_p": min_p,
|
|
"repetition_penalty": repetition_penalty,
|
|
"min_image_tokens": min_image_tokens,
|
|
"max_image_tokens": max_image_tokens,
|
|
"do_image_splitting": do_image_splitting,
|
|
"max_tokens": max_tokens,
|
|
"cache_prompt": True,
|
|
"stream": stream,
|
|
**kwargs
|
|
}
|
|
|
|
# Debug: Show model parameter if present (for llama-swap debugging)
|
|
if 'model' in payload and payload['model']:
|
|
import os
|
|
if os.environ.get('DEBUG_LLAMA_SWAP'):
|
|
print(f"[DEBUG] Requesting model: {payload['model']}")
|
|
|
|
response = requests.post(
|
|
f"{self.base_url}/v1/chat/completions",
|
|
headers={"Content-Type": "application/json"},
|
|
json=payload,
|
|
stream=stream
|
|
)
|
|
response.raise_for_status()
|
|
|
|
if stream:
|
|
return response
|
|
|
|
return response.json()
|
|
|
|
def completion(
|
|
self,
|
|
prompt: Union[str, List[Union[str, int]]],
|
|
temperature: float = 0.1,
|
|
min_p: float = 0.15,
|
|
repetition_penalty: float = 1.05,
|
|
min_image_tokens: int = 64,
|
|
max_image_tokens: int = 256,
|
|
do_image_splitting: bool = True,
|
|
max_tokens: int = -1,
|
|
stream: bool = False,
|
|
**kwargs
|
|
) -> Union[Dict[str, Any], requests.Response]:
|
|
"""
|
|
Generate a completion using the non-OAI compatible API.
|
|
|
|
Args:
|
|
prompt: The prompt string or list of tokens
|
|
temperature: Sampling temperature (default: 0.1)
|
|
min_p: Minimum probability for sampling (default: 0.15)
|
|
repetition_penalty: Repetition penalty factor (default: 1.05)
|
|
min_image_tokens: Minimum image tokens (default: 64)
|
|
max_image_tokens: Maximum image tokens (default: 256)
|
|
do_image_splitting: Whether to split images (default: True)
|
|
max_tokens: Maximum tokens to generate (default: -1 for infinity)
|
|
stream: Whether to stream the response (default: False)
|
|
**kwargs: Additional parameters for the completion
|
|
|
|
Returns:
|
|
Completion response or streaming response
|
|
"""
|
|
payload = {
|
|
"prompt": prompt,
|
|
"temperature": temperature,
|
|
"min_p": min_p,
|
|
"repeat_penalty": repetition_penalty,
|
|
"min_image_tokens": min_image_tokens,
|
|
"max_image_tokens": max_image_tokens,
|
|
"do_image_splitting": do_image_splitting,
|
|
"cache_prompt": True,
|
|
"n_predict": max_tokens,
|
|
"stream": stream,
|
|
**kwargs
|
|
}
|
|
|
|
response = requests.post(
|
|
f"{self.base_url}/completion",
|
|
headers={"Content-Type": "application/json"},
|
|
json=payload,
|
|
stream=stream
|
|
)
|
|
response.raise_for_status()
|
|
|
|
if stream:
|
|
return response
|
|
|
|
return response.json()
|
|
|
|
@staticmethod
|
|
def _encode_image_to_base64(image_path: str) -> str:
|
|
"""
|
|
Encode an image file to base64 string.
|
|
|
|
Args:
|
|
image_path: Path to the image file
|
|
|
|
Returns:
|
|
Base64 encoded image string
|
|
"""
|
|
with open(image_path, "rb") as image_file:
|
|
return base64.b64encode(image_file.read()).decode('utf-8')
|
|
|
|
@staticmethod
|
|
def _encode_cv2_image_to_base64(image: np.ndarray) -> str:
|
|
"""
|
|
Encode an OpenCV image to base64 string.
|
|
|
|
Args:
|
|
image: OpenCV image (numpy array)
|
|
|
|
Returns:
|
|
Base64 encoded image string
|
|
"""
|
|
_, buffer = cv2.imencode('.jpg', image)
|
|
return base64.b64encode(buffer).decode('utf-8')
|
|
|
|
def create_multimodal_message(
|
|
self,
|
|
role: str,
|
|
content: str,
|
|
images: Optional[List[Union[str, np.ndarray]]] = None
|
|
) -> Dict[str, Any]:
|
|
"""
|
|
Create a multimodal message with text and images.
|
|
|
|
Args:
|
|
role: Role of the message (system, user, assistant)
|
|
content: Text content of the message
|
|
images: List of image paths or OpenCV images (numpy arrays)
|
|
|
|
Returns:
|
|
Formatted message dictionary
|
|
"""
|
|
if not images:
|
|
return {"role": role, "content": content}
|
|
|
|
# Process images
|
|
image_data = []
|
|
for img in images:
|
|
if isinstance(img, str):
|
|
# Image path
|
|
encoded_image = self._encode_image_to_base64(img)
|
|
else:
|
|
# OpenCV image
|
|
encoded_image = self._encode_cv2_image_to_base64(img)
|
|
image_data.append(encoded_image)
|
|
|
|
# Create multimodal content
|
|
multimodal_content = [
|
|
{"type": "text", "text": content}
|
|
]
|
|
|
|
for img_data in image_data:
|
|
multimodal_content.append({
|
|
"type": "image_url",
|
|
"image_url": {
|
|
"url": f"data:image/jpeg;base64,{img_data}"
|
|
}
|
|
})
|
|
|
|
return {"role": role, "content": multimodal_content} |