From 435033ea074fe00e24fd2b64e9a5b8967739bbbe Mon Sep 17 00:00:00 2001 From: Rick McEwen Date: Tue, 24 Feb 2026 11:30:41 -0700 Subject: [PATCH] Add color variety and hex specificity test scripts with report - test_color_variety.py: named-color test for local llama.cpp VLM - test_color_variety_gemini.py: named-color test for Gemini 3 Flash API - test_hex_color_specificity.py: hex color specificity test for Gemini - test_hex_color_specificity_llama.py: hex color specificity test for local VLM - jersey_prompt_hex_color.txt: prompt requesting hex color codes - COLOR_TEST_REPORT.md: analysis report comparing 3 models across 5 tests - color_test_results.md: raw test output from all runs --- COLOR_TEST_REPORT.md | 205 ++++++++++++++++ color_test_results.md | 299 +++++++++++++++++++++++ jersey_prompt_hex_color.txt | 50 ++++ test_color_variety.py | 151 ++++++++++++ test_color_variety_gemini.py | 270 +++++++++++++++++++++ test_hex_color_specificity.py | 355 ++++++++++++++++++++++++++++ test_hex_color_specificity_llama.py | 316 +++++++++++++++++++++++++ 7 files changed, 1646 insertions(+) create mode 100644 COLOR_TEST_REPORT.md create mode 100644 color_test_results.md create mode 100644 jersey_prompt_hex_color.txt create mode 100644 test_color_variety.py create mode 100644 test_color_variety_gemini.py create mode 100644 test_hex_color_specificity.py create mode 100644 test_hex_color_specificity_llama.py diff --git a/COLOR_TEST_REPORT.md b/COLOR_TEST_REPORT.md new file mode 100644 index 0000000..cac081f --- /dev/null +++ b/COLOR_TEST_REPORT.md @@ -0,0 +1,205 @@ +# Jersey Color Detection - VLM Comparison Report + +**Date:** 2026-02-24 +**Test set:** 161 basketball images (`basketball_jersery_color_test_files/`) + +## Overview + +Five tests were run to evaluate how vision-language models describe jersey colors: + +| Test | Model | Images | Prompt | Purpose | +|------|-------|--------|--------|---------| +| 1 | Qwen2.5-VL-7B (local, llama.cpp) | 161 | Named colors | Baseline color vocabulary | +| 2 | Gemini 3 Flash (cloud API) | 161 | Named colors | Cloud model color vocabulary | +| 3 | Qwen3-VL-8B (local, llama.cpp) | 161 | Named colors | Newer local model color vocabulary | +| 4 | Gemini 3 Flash (cloud API) | 20 (random, seed=42) | Hex codes (jersey only) | Hex color specificity | +| 5 | Qwen3-VL-8B (local, llama.cpp) | 20 (random, seed=42) | Hex codes (jersey only) | Hex color specificity | + +--- + +## Named Color Vocabulary (Tests 1-3) + +### Detection Volume + +| Metric | Qwen2.5-VL-7B | Gemini 3 Flash | Qwen3-VL-8B | +|--------|---------------|----------------|--------------| +| Jerseys detected | 369 | 453 | 444 | +| Errors | 0 | 0 | 1 | +| Avg time/image | 14.9s | 15.9s | 17.0s | +| Unique jersey colors | 15 | 19 | 15 | +| Unique number colors | 11 | 15 | 13 | +| Combined palette size | 15 | 19 | 17 | + +Gemini detected the most jerseys (453) and used the broadest color vocabulary (19 terms). Qwen3-VL-8B detected nearly as many jerseys (444) as Gemini but with a vocabulary closer to the older Qwen2.5 model. + +### Jersey Color Distribution + +| Color | Qwen2.5-VL-7B | Gemini 3 Flash | Qwen3-VL-8B | Notes | +|-------|---------------|----------------|--------------|-------| +| white | 84 (22.8%) | 125 (27.6%) | 120 (27.0%) | Top color for all three | +| blue | 60 (16.3%) | 43 (9.5%) | 69 (15.5%) | Both Qwen models lump blues | +| green | 48 (13.0%) | 60 (13.2%) | 53 (11.9%) | Consistent across models | +| black | 31 (8.4%) | 21 (4.6%) | 33 (7.4%) | | +| purple | 25 (6.8%) | 28 (6.2%) | 30 (6.8%) | Consistent | +| red | 27 (7.3%) | 22 (4.9%) | 28 (6.3%) | | +| orange | 24 (6.5%) | 27 (6.0%) | 27 (6.1%) | Very consistent | +| yellow | 27 (7.3%) | 24 (5.3%) | 26 (5.9%) | | +| maroon | 14 (3.8%) | 23 (5.1%) | 15 (3.4%) | Gemini uses maroon more | +| light blue | 6 (1.6%) | 22 (4.9%) | 13 (2.9%) | Gemini distinguishes light blue most | +| gray/grey | 9 (2.4%) | 12 (2.6%) | 10 (2.3%) | | +| brown | 6 (1.6%) | 13 (2.9%) | 9 (2.0%) | | +| teal | 4 (1.1%) | 7 (1.5%) | 7 (1.6%) | | +| pink | 2 (0.5%) | 2 (0.4%) | 2 (0.5%) | | +| gold | 2 (0.5%) | 2 (0.4%) | 2 (0.5%) | | +| navy blue | -- | 11 (2.4%) | -- | Gemini-only | +| dark blue | -- | 9 (2.0%) | -- | Gemini-only | +| dark brown | -- | 1 (0.2%) | -- | Gemini-only | +| navy | -- | 1 (0.2%) | -- | Gemini-only | + +### Number Color Distribution + +| Color | Qwen2.5-VL-7B | Gemini 3 Flash | Qwen3-VL-8B | +|-------|---------------|----------------|--------------| +| white | 195 (52.8%) | 183 (40.4%) | 184 (41.4%) | +| black | 60 (16.3%) | 40 (8.8%) | 44 (9.9%) | +| yellow | 39 (10.6%) | 58 (12.8%) | 32 (7.2%) | +| red | 30 (8.1%) | 44 (9.7%) | 41 (9.2%) | +| blue | 23 (6.2%) | 39 (8.6%) | 39 (8.8%) | +| orange | 8 (2.2%) | 21 (4.6%) | 29 (6.5%) | +| gold | -- | 5 (1.1%) | 21 (4.7%) | +| dark blue | -- | 14 (3.1%) | 9 (2.0%) | +| maroon | 2 (0.5%) | 14 (3.1%) | 12 (2.7%) | +| green | 3 (0.8%) | 13 (2.9%) | 14 (3.2%) | +| purple | 4 (1.1%) | 11 (2.4%) | 11 (2.5%) | +| pink | 3 (0.8%) | 6 (1.3%) | 6 (1.4%) | +| brown | 2 (0.5%) | 2 (0.4%) | -- | +| grey | -- | 2 (0.4%) | -- | +| navy blue | -- | 1 (0.2%) | -- | +| silver | -- | -- | 2 (0.5%) | + +### Key Differences in Named Color Mode + +1. **Gemini has the richest vocabulary.** It uses 19 distinct jersey color terms vs 15 for both Qwen models. The extras are all blue-shade variants (navy blue, dark blue, navy) and dark brown. + +2. **Both Qwen models lump blues together.** Qwen2.5-VL-7B reports 60 "blue" jerseys, Qwen3-VL-8B reports 69. Gemini splits these into blue (43), light blue (22), navy blue (11), dark blue (9), and navy (1) — totaling 86 blue-family detections with much finer granularity. + +3. **Qwen3-VL-8B is a modest upgrade over Qwen2.5-VL-7B.** It detects 20% more jerseys (444 vs 369) and uses the same 15 jersey color terms but with a slightly more balanced distribution. It has the same vocabulary as Qwen2.5 but added "dark blue", "silver" to its number color palette. + +4. **Gemini detects the most jerseys overall.** 453 vs 444 (Qwen3) vs 369 (Qwen2.5). The two newer models are close, while Qwen2.5 lags behind. + +5. **All three models are dominated by basic colors.** White, blue/green, and black account for the majority of detections. None spontaneously uses precise shade names like "crimson", "cobalt", or "forest green". + +6. **Qwen3-VL-8B favors "gold" for number colors.** It reported gold 21 times for number colors vs Gemini's 5 and Qwen2.5's 0. This may reflect team-specific coloring (e.g., Lakers gold numbers). + +--- + +## Hex Color Specificity (Tests 4-5) + +Both tests used the same 20 random images (seed=42) and evaluated **jersey colors only** (number colors excluded since they are usually primary colors like white or black). + +### Summary + +| Metric | Gemini 3 Flash | Qwen3-VL-8B | +|--------|----------------|--------------| +| Images tested | 20 | 20 | +| Total jerseys | 56 | 59 | +| Jersey color values | 56 | 59 | +| Valid hex codes | 56/56 (100%) | 59/59 (100%) | +| Unique hex values | 24 | 21 | +| Specific (distinct shade) | 40 (71.4%) | 37 (62.7%) | +| Generic (near primary) | 16 (28.6%) | 22 (37.3%) | + +### Distance from Nearest Primary Color + +| Stat | Gemini 3 Flash | Qwen3-VL-8B | +|------|----------------|--------------| +| Min | 0.0 | 0.0 | +| Avg | 44.5 | 34.5 | +| Max | 111.0 | 110.7 | + +(Scale: 0 = exact primary match. 20 = generic threshold. Higher = more specific.) + +### Gemini 3 Flash - Unique Hex Values (24) + +| Hex | RGB | Count | Classification | +|-----|-----|-------|---------------| +| `#004B23` | (0, 75, 35) | x7 | specific, near green (dark), d=63.5 | +| `#1A2344` | (26, 35, 68) | x2 | specific, near navy, d=74.2 | +| `#1E4BA1` | (30, 75, 161) | x1 | specific, near navy, d=87.3 | +| `#2B231D` | (43, 35, 29) | x1 | specific, near black, d=62.6 | +| `#3D2B1F` | (61, 43, 31) | x1 | specific, near black, d=80.8 | +| `#461D7C` | (70, 29, 124) | x1 | specific, near purple, d=65.0 | +| `#4B2E83` | (75, 46, 131) | x5 | specific, near purple, d=70.2 | +| `#701112` | (112, 17, 18) | x1 | specific, near maroon, d=29.5 | +| `#7BAFD4` | (123, 175, 212) | x3 | specific, near silver, d=73.8 | +| `#990000` | (153, 0, 0) | x2 | specific, near maroon, d=25.0 | +| `#A9A9A9` | (169, 169, 169) | x1 | specific, near silver, d=39.8 | +| `#C41230` | (196, 18, 48) | x1 | specific, near brown, d=39.7 | +| `#D11111` | (209, 17, 17) | x2 | specific, near red, d=51.9 | +| `#D32F2F` | (211, 47, 47) | x2 | specific, near brown, d=46.5 | +| `#E31837` | (227, 24, 55) | x1 | specific, near brown, d=65.9 | +| `#E31B23` | (227, 27, 35) | x1 | specific, near red, d=52.3 | +| `#E3242B` | (227, 36, 43) | x2 | specific, near brown, d=62.3 | +| `#E6E600` | (230, 230, 0) | x1 | specific, near gold, d=29.2 | +| `#E8E8E8` | (232, 232, 232) | x1 | specific, near white, d=39.8 | +| `#E91E63` | (233, 30, 99) | x1 | specific, near brown, d=89.5 | +| `#F06292` | (240, 98, 146) | x2 | specific, near pink, d=111.0 | +| `#F57C00` | (245, 124, 0) | x1 | specific, near orange, d=42.2 | +| `#FFCD00` | (255, 205, 0) | x1 | GENERIC, near gold, d=10.0 | +| `#FFFFFF` | (255, 255, 255) | x15 | GENERIC, near white, d=0.0 | + +### Qwen3-VL-8B - Unique Hex Values (21) + +| Hex | RGB | Count | Classification | +|-----|-----|-------|---------------| +| `#000000` | (0, 0, 0) | x1 | GENERIC, near black, d=0.0 | +| `#006400` | (0, 100, 0) | x10 | specific, near green (dark), d=28.0 | +| `#191970` | (25, 25, 112) | x1 | specific, near navy, d=38.8 | +| `#19418A` | (25, 65, 138) | x1 | specific, near navy, d=70.4 | +| `#3D2B21` | (61, 43, 33) | x2 | specific, near black, d=81.6 | +| `#66B2FF` | (102, 178, 255) | x3 | specific, near silver, d=110.7 | +| `#6A0DAD` | (106, 13, 173) | x6 | specific, near purple, d=51.7 | +| `#8B0000` | (139, 0, 0) | x1 | GENERIC, near maroon, d=11.0 | +| `#A9A9A9` | (169, 169, 169) | x1 | specific, near silver, d=39.8 | +| `#B22234` | (178, 34, 52) | x2 | GENERIC, near brown, d=18.2 | +| `#D32F2F` | (211, 47, 47) | x3 | specific, near brown, d=46.5 | +| `#D60000` | (214, 0, 0) | x3 | specific, near red, d=41.0 | +| `#DC143C` | (220, 20, 60) | x2 | specific, near brown, d=61.9 | +| `#F5F5DC` | (245, 245, 220) | x2 | specific, near white, d=37.7 | +| `#F5F5F5` | (245, 245, 245) | x1 | GENERIC, near white, d=17.3 | +| `#FF0000` | (255, 0, 0) | x1 | GENERIC, near red, d=0.0 | +| `#FF6347` | (255, 99, 71) | x1 | specific, near orange, d=96.9 | +| `#FF69B4` | (255, 105, 180) | x2 | specific, near pink, d=90.0 | +| `#FFD700` | (255, 215, 0) | x1 | GENERIC, near gold, d=0.0 | +| `#FFFF00` | (255, 255, 0) | x1 | GENERIC, near yellow, d=0.0 | +| `#FFFFFF` | (255, 255, 255) | x14 | GENERIC, near white, d=0.0 | + +### Notable Findings + +- **Both models can produce valid hex codes.** 100% of returned values were valid hex in both cases. + +- **Gemini is more specific overall.** 71.4% of its jersey hex codes were distinct shades vs 62.7% for Qwen3. Gemini also produced more unique hex values (24 vs 21) and had a higher average distance from primaries (44.5 vs 34.5). + +- **Gemini uses more varied shades of each color family.** For red-family jerseys, Gemini returned 8 distinct hex values (`#701112`, `#990000`, `#C41230`, `#D11111`, `#D32F2F`, `#E31837`, `#E31B23`, `#E3242B`). Qwen3 returned 6 (`#8B0000`, `#B22234`, `#D32F2F`, `#D60000`, `#DC143C`, `#FF0000`), including two exact primaries. + +- **Qwen3 reuses hex values more heavily.** `#006400` (dark green) appeared 10 times and `#FFFFFF` 14 times — two values account for 41% of all results. Gemini's most repeated value was `#FFFFFF` at 15 times (27%), with better spread across other shades. + +- **White dominates both models.** `#FFFFFF` was the single most common value for both (Gemini: x15, Qwen3: x14), which is expected given white jerseys are the most common in basketball. + +- **Both models share some exact hex codes.** `#3D2B21` (dark brown), `#A9A9A9` (dark silver/gray), and `#D32F2F` (medium red) appeared in both models' outputs, suggesting some convergence on certain color estimations. + +--- + +## Conclusions + +1. **For basic color categorization, all three models work.** If you only need to distinguish "white vs dark vs colored" jerseys, any will do. Gemini offers slightly finer granularity with its blue-shade vocabulary (navy blue, dark blue, navy). + +2. **Gemini detects the most jerseys per image** (2.81 avg), followed closely by Qwen3-VL-8B (2.76 avg), with Qwen2.5-VL-7B trailing (2.29 avg). + +3. **Qwen3-VL-8B is a solid upgrade over Qwen2.5-VL-7B** for detection volume (+20% more jerseys) while maintaining the same color vocabulary. It runs locally without cloud API costs, making it a good default choice. + +4. **Hex color prompting works for jersey body colors.** Both models return specific hex shades the majority of the time (Gemini 71%, Qwen3 63%). Gemini produces more varied and specific shades, while Qwen3 tends to reuse a smaller set of hex values. + +5. **Neither model is a reliable colorimeter.** The hex values should be treated as rough shade estimates, not pixel-accurate measurements. For precise color matching, traditional computer vision (e.g., sampling pixels from the detected jersey region) would be more reliable. + +6. **Recommendation:** Use named-color prompts for general jersey classification. Reserve hex-color prompts for use cases where distinguishing similar shades matters (e.g., telling apart two teams that both wear "blue"). Gemini gives the best hex specificity but requires a cloud API; Qwen3-VL-8B is a capable local alternative. \ No newline at end of file diff --git a/color_test_results.md b/color_test_results.md new file mode 100644 index 0000000..e7b68cf --- /dev/null +++ b/color_test_results.md @@ -0,0 +1,299 @@ +#Qwen2.5-VL-7B Model Results: + +====================================================================== +COLOR VARIETY SUMMARY +====================================================================== +Images processed: 161 +Total jerseys detected: 369 +Errors: 0 +Total time: 2397.7s (14.9s avg) + +--- Jersey Colors (15 unique) --- + white 84 ################################################## + blue 60 ################################################## + green 48 ################################################ + black 31 ############################### + yellow 27 ########################### + red 27 ########################### + purple 25 ######################### + orange 24 ######################## + maroon 14 ############## + gray 9 ######### + light blue 6 ###### + brown 6 ###### + teal 4 #### + pink 2 ## + gold 2 ## + +--- Number Colors (11 unique) --- + white 195 ################################################## + black 60 ################################################## + yellow 39 ####################################### + red 30 ############################## + blue 23 ####################### + orange 8 ######## + purple 4 #### + pink 3 ### + green 3 ### + brown 2 ## + maroon 2 ## + +--- Combined Color Palette (15 unique values) --- + black jersey: 31 number: 60 + blue jersey: 60 number: 23 + brown jersey: 6 number: 2 + gold jersey: 2 number: 0 + gray jersey: 9 number: 0 + green jersey: 48 number: 3 + light blue jersey: 6 number: 0 + maroon jersey: 14 number: 2 + orange jersey: 24 number: 8 + pink jersey: 2 number: 3 + purple jersey: 25 number: 4 + red jersey: 27 number: 30 + teal jersey: 4 number: 0 + white jersey: 84 number:195 + yellow jersey: 27 number: 39 + + +#Gemini 3 Flash Results: + +====================================================================== +COLOR VARIETY SUMMARY (gemini-3-flash-preview) +====================================================================== +Images processed: 161 +Total jerseys detected: 453 +Errors: 0 +Total time: 2560.0s (15.9s avg) + +--- Jersey Colors (19 unique) --- + white 125 ################################################## + green 60 ################################################## + blue 43 ########################################### + purple 28 ############################ + orange 27 ########################### + yellow 24 ######################## + maroon 23 ####################### + light blue 22 ###################### + red 22 ###################### + black 21 ##################### + brown 13 ############# + grey 12 ############ + navy blue 11 ########### + dark blue 9 ######### + teal 7 ####### + pink 2 ## + gold 2 ## + dark brown 1 # + navy 1 # + +--- Number Colors (15 unique) --- + white 183 ################################################## + yellow 58 ################################################## + red 44 ############################################ + black 40 ######################################## + blue 39 ####################################### + orange 21 ##################### + dark blue 14 ############## + maroon 14 ############## + green 13 ############# + purple 11 ########### + pink 6 ###### + gold 5 ##### + brown 2 ## + grey 2 ## + navy blue 1 # + +--- Combined Color Palette (19 unique values) --- + black jersey: 21 number: 40 + blue jersey: 43 number: 39 + brown jersey: 13 number: 2 + dark blue jersey: 9 number: 14 + dark brown jersey: 1 number: 0 + gold jersey: 2 number: 5 + green jersey: 60 number: 13 + grey jersey: 12 number: 2 + light blue jersey: 22 number: 0 + maroon jersey: 23 number: 14 + navy jersey: 1 number: 0 + navy blue jersey: 11 number: 1 + orange jersey: 27 number: 21 + pink jersey: 2 number: 6 + purple jersey: 28 number: 11 + red jersey: 22 number: 44 + teal jersey: 7 number: 0 + white jersey:125 number:183 + yellow jersey: 24 number: 58 + + +#Qwen3-VL-8B Model Results: + +====================================================================== +COLOR VARIETY SUMMARY +====================================================================== +Images processed: 161 +Total jerseys detected: 444 +Errors: 1 +Total time: 2738.7s (17.0s avg) + +--- Jersey Colors (15 unique) --- + white 120 ################################################## + blue 69 ################################################## + green 53 ################################################## + black 33 ################################# + purple 30 ############################## + red 28 ############################ + orange 27 ########################### + yellow 26 ########################## + maroon 15 ############### + light blue 13 ############# + gray 10 ########## + brown 9 ######### + teal 7 ####### + pink 2 ## + gold 2 ## + +--- Number Colors (13 unique) --- + white 184 ################################################## + black 44 ############################################ + red 41 ######################################### + blue 39 ####################################### + yellow 32 ################################ + orange 29 ############################# + gold 21 ##################### + green 14 ############## + maroon 12 ############ + purple 11 ########### + dark blue 9 ######### + pink 6 ###### + silver 2 ## + +--- Combined Color Palette (17 unique values) --- + black jersey: 33 number: 44 + blue jersey: 69 number: 39 + brown jersey: 9 number: 0 + dark blue jersey: 0 number: 9 + gold jersey: 2 number: 21 + gray jersey: 10 number: 0 + green jersey: 53 number: 14 + light blue jersey: 13 number: 0 + maroon jersey: 15 number: 12 + orange jersey: 27 number: 29 + pink jersey: 2 number: 6 + purple jersey: 30 number: 11 + red jersey: 28 number: 41 + silver jersey: 0 number: 2 + teal jersey: 7 number: 0 + white jersey:120 number:184 + yellow jersey: 26 number: 32 + + + +#Gemini 3 Flash (Hex Colors, random sample of 10 images) Results: + +Test params: test_hex_color_specificity.py --sample 20 --seed 42 + +====================================================================== +HEX COLOR SPECIFICITY ANALYSIS +====================================================================== +Model: gemini-3-flash-preview +Images tested: 20 (seed=42) +Total jerseys: 56 +Total jersey color values: 56 +Errors: 0 + +Valid hex codes: 56/56 + +--- Specificity Breakdown --- + Generic (near a pure primary): 16 (28.6%) + Specific (distinct shade): 40 (71.4%) + +--- Unique Hex Values (24) --- + #004B23 RGB( 0, 75, 35) HSL(148.0,100.0%,14.7%) x7 [specific, near green (dark), d=63.5] + #1A2344 RGB( 26, 35, 68) HSL(227.1,44.7%,18.4%) x2 [specific, near navy, d=74.2] + #1E4BA1 RGB( 30, 75,161) HSL(219.4,68.6%,37.5%) x1 [specific, near navy, d=87.3] + #2B231D RGB( 43, 35, 29) HSL( 25.7,19.4%,14.1%) x1 [specific, near black, d=62.6] + #3D2B1F RGB( 61, 43, 31) HSL( 24.0,32.6%,18.0%) x1 [specific, near black, d=80.8] + #461D7C RGB( 70, 29,124) HSL(265.9,62.1%,30.0%) x1 [specific, near purple, d=65.0] + #4B2E83 RGB( 75, 46,131) HSL(260.5,48.0%,34.7%) x5 [specific, near purple, d=70.2] + #701112 RGB(112, 17, 18) HSL(359.4,73.6%,25.3%) x1 [specific, near maroon, d=29.5] + #7BAFD4 RGB(123,175,212) HSL(204.9,50.9%,65.7%) x3 [specific, near silver, d=73.8] + #990000 RGB(153, 0, 0) HSL( 0.0,100.0%,30.0%) x2 [specific, near maroon, d=25.0] + #A9A9A9 RGB(169,169,169) HSL( 0.0, 0.0%,66.3%) x1 [specific, near silver, d=39.8] + #C41230 RGB(196, 18, 48) HSL(349.9,83.2%,42.0%) x1 [specific, near brown, d=39.7] + #D11111 RGB(209, 17, 17) HSL( 0.0,85.0%,44.3%) x2 [specific, near red, d=51.9] + #D32F2F RGB(211, 47, 47) HSL( 0.0,65.1%,50.6%) x2 [specific, near brown, d=46.5] + #E31837 RGB(227, 24, 55) HSL(350.8,80.9%,49.2%) x1 [specific, near brown, d=65.9] + #E31B23 RGB(227, 27, 35) HSL(357.6,78.7%,49.8%) x1 [specific, near red, d=52.3] + #E3242B RGB(227, 36, 43) HSL(357.8,77.3%,51.6%) x2 [specific, near brown, d=62.3] + #E6E600 RGB(230,230, 0) HSL( 60.0,100.0%,45.1%) x1 [specific, near gold, d=29.2] + #E8E8E8 RGB(232,232,232) HSL( 0.0, 0.0%,91.0%) x1 [specific, near white, d=39.8] + #E91E63 RGB(233, 30, 99) HSL(339.6,82.2%,51.6%) x1 [specific, near brown, d=89.5] + #F06292 RGB(240, 98,146) HSL(339.7,82.6%,66.3%) x2 [specific, near pink, d=111.0] + #F57C00 RGB(245,124, 0) HSL( 30.4,100.0%,48.0%) x1 [specific, near orange, d=42.2] + #FFCD00 RGB(255,205, 0) HSL( 48.2,100.0%,50.0%) x1 [GENERIC, near gold, d=10.0] + #FFFFFF RGB(255,255,255) HSL( 0.0, 0.0%,100.0%) x15 [GENERIC, near white, d=0.0] + +--- Distance from Nearest Primary --- + Min: 0.0 Avg: 44.5 Max: 111.0 + (Higher = more specific. Threshold for 'generic' = 20) + +--- Verdict --- + MIXED results (71% specific). + The model sometimes returns specific shades but often falls back to primaries. + + + +#Qwen3-VL-8B (Hex Colors, random sample of 10 images) Results: + +Test params: test_hex_color_specificity_llama.py --sample 20 --seed 42 + +====================================================================== +HEX COLOR SPECIFICITY ANALYSIS +====================================================================== +Model: unsloth_Qwen3-VL-8B-Instruct-GGUF_Qwen3-VL-8B-Instruct-BF16 +Server: http://agx:8080 +Images tested: 20 (seed=42) +Total jerseys: 59 +Total jersey color values: 59 +Errors: 0 + +Valid hex codes: 59/59 + +--- Specificity Breakdown --- + Generic (near a pure primary): 22 (37.3%) + Specific (distinct shade): 37 (62.7%) + +--- Unique Hex Values (21) --- + #000000 RGB( 0, 0, 0) HSL( 0.0, 0.0%, 0.0%) x1 [GENERIC, near black, d=0.0] + #006400 RGB( 0,100, 0) HSL(120.0,100.0%,19.6%) x10 [specific, near green (dark), d=28.0] + #191970 RGB( 25, 25,112) HSL(240.0,63.5%,26.9%) x1 [specific, near navy, d=38.8] + #19418A RGB( 25, 65,138) HSL(218.8,69.3%,32.0%) x1 [specific, near navy, d=70.4] + #3D2B21 RGB( 61, 43, 33) HSL( 21.4,29.8%,18.4%) x2 [specific, near black, d=81.6] + #66B2FF RGB(102,178,255) HSL(210.2,100.0%,70.0%) x3 [specific, near silver, d=110.7] + #6A0DAD RGB(106, 13,173) HSL(274.9,86.0%,36.5%) x6 [specific, near purple, d=51.7] + #8B0000 RGB(139, 0, 0) HSL( 0.0,100.0%,27.3%) x1 [GENERIC, near maroon, d=11.0] + #A9A9A9 RGB(169,169,169) HSL( 0.0, 0.0%,66.3%) x1 [specific, near silver, d=39.8] + #B22234 RGB(178, 34, 52) HSL(352.5,67.9%,41.6%) x2 [GENERIC, near brown, d=18.2] + #D32F2F RGB(211, 47, 47) HSL( 0.0,65.1%,50.6%) x3 [specific, near brown, d=46.5] + #D60000 RGB(214, 0, 0) HSL( 0.0,100.0%,42.0%) x3 [specific, near red, d=41.0] + #DC143C RGB(220, 20, 60) HSL(348.0,83.3%,47.1%) x2 [specific, near brown, d=61.9] + #F5F5DC RGB(245,245,220) HSL( 60.0,55.6%,91.2%) x2 [specific, near white, d=37.7] + #F5F5F5 RGB(245,245,245) HSL( 0.0, 0.0%,96.1%) x1 [GENERIC, near white, d=17.3] + #FF0000 RGB(255, 0, 0) HSL( 0.0,100.0%,50.0%) x1 [GENERIC, near red, d=0.0] + #FF6347 RGB(255, 99, 71) HSL( 9.1,100.0%,63.9%) x1 [specific, near orange, d=96.9] + #FF69B4 RGB(255,105,180) HSL(330.0,100.0%,70.6%) x2 [specific, near pink, d=90.0] + #FFD700 RGB(255,215, 0) HSL( 50.6,100.0%,50.0%) x1 [GENERIC, near gold, d=0.0] + #FFFF00 RGB(255,255, 0) HSL( 60.0,100.0%,50.0%) x1 [GENERIC, near yellow, d=0.0] + #FFFFFF RGB(255,255,255) HSL( 0.0, 0.0%,100.0%) x14 [GENERIC, near white, d=0.0] + +--- Distance from Nearest Primary --- + Min: 0.0 Avg: 34.5 Max: 110.7 + (Higher = more specific. Threshold for 'generic' = 20) + +--- Verdict --- + MIXED results (63% specific). + The model sometimes returns specific shades but often falls back to primaries. + + + diff --git a/jersey_prompt_hex_color.txt b/jersey_prompt_hex_color.txt new file mode 100644 index 0000000..4a8d733 --- /dev/null +++ b/jersey_prompt_hex_color.txt @@ -0,0 +1,50 @@ +You are an expert at detecting sports jerseys in images. Carefully examine the provided image and identify all visible sports jerseys. + +CRITICAL INSTRUCTIONS: +1. ONLY detect jerseys that are CLEARLY VISIBLE in the image +2. ONLY include jersey numbers that you can ACTUALLY READ in the image +3. If you CANNOT see any jerseys, you MUST return {"jerseys": []} +4. DO NOT make up, imagine, or guess jersey numbers that aren't visible +5. DO NOT include jerseys if you cannot clearly see the number + +COLOR INSTRUCTIONS: +- Report jersey_color and number_color as HEX color codes (e.g. "#8B0000", "#1E3A5F") +- Do NOT use generic color names like "red", "blue", "white" +- Estimate the SPECIFIC shade you see in the image as precisely as possible +- For example: dark maroon should be "#800000", not "#FF0000" +- Royal blue should be "#4169E1", not "#0000FF" + +RESPONSE FORMAT: +Respond ONLY with a valid JSON object. No explanations, no markdown, no extra text. + +Use DOUBLE QUOTES (") for all JSON keys and string values. + +The JSON must have a single key "jerseys" with an array of dictionaries. + +Each dictionary must have exactly these three keys: +- "jersey_number": The number on the jersey (as a string, only if clearly visible) +- "jersey_color": The primary color of the jersey as a HEX code (e.g. "#8B0000") +- "number_color": The color of the number on the jersey as a HEX code (e.g. "#FFFFFF") + +Example response for an image WITH visible jerseys: +{ + "jerseys": [ + { + "jersey_number": "101", + "jersey_color": "#8B0000", + "number_color": "#F5F5DC" + }, + { + "jersey_number": "142", + "jersey_color": "#1E3A5F", + "number_color": "#DAA520" + } + ] +} + +Example response for an image WITHOUT jerseys or with unclear numbers: +{"jerseys": []} + +REMEMBER: Only include jerseys with numbers you can ACTUALLY SEE in the image. When in doubt, return empty array. + +Now analyze the image and return the JSON object. \ No newline at end of file diff --git a/test_color_variety.py b/test_color_variety.py new file mode 100644 index 0000000..c8bb828 --- /dev/null +++ b/test_color_variety.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python3 +""" +Test script to discover the variety of colors a VLM returns for jersey detection. + +Submits all test images to the VLM and tallies every unique jersey_color and +number_color value, producing a summary of the model's color vocabulary. + +Usage: + python test_color_variety.py +""" + +import json +import os +import re +import sys +import time +from collections import Counter +from pathlib import Path + +import cv2 + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from scan_utils.llama_cpp_client import LlamaCppClient + +SERVER_URL = "http://agx:8080" +IMAGES_DIR = os.path.join(os.path.dirname(__file__), "basketball_jersery_color_test_files") +PROMPT_FILE = os.path.join(os.path.dirname(__file__), "jersey_prompt.txt") + + +def clean_response(text: str) -> str: + """Remove think tags and markdown code blocks from model output.""" + cleaned = re.sub(r'.*?', '', text, flags=re.DOTALL | re.IGNORECASE) + cleaned = re.sub(r'\u25c1think\u25b7.*?\u25c1/think\u25b7', '', cleaned, flags=re.DOTALL) + cleaned = re.sub(r'', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\u25c1/?think\u25b7', '', cleaned, flags=re.IGNORECASE) + + json_block = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', cleaned, flags=re.DOTALL | re.IGNORECASE) + if json_block: + cleaned = json_block.group(1) + else: + cleaned = re.sub(r'```(?:json)?', '', cleaned, flags=re.IGNORECASE) + + return cleaned.strip() + + +def main(): + # Load prompt + with open(PROMPT_FILE, 'r') as f: + prompt = f.read() + + # Gather image files (extensions OpenCV can handle) + valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} + image_files = sorted([ + p for p in Path(IMAGES_DIR).iterdir() + if p.suffix.lower() in valid_extensions + ]) + + skipped = sorted([ + p.name for p in Path(IMAGES_DIR).iterdir() + if p.is_file() and p.suffix.lower() not in valid_extensions + ]) + + print(f"Images to process: {len(image_files)}") + if skipped: + print(f"Skipping {len(skipped)} unsupported files: {', '.join(skipped)}") + print(f"Server: {SERVER_URL}") + print(f"Prompt: {PROMPT_FILE} ({len(prompt)} chars)") + print("=" * 70) + + client = LlamaCppClient(base_url=SERVER_URL) + + jersey_color_counter = Counter() + number_color_counter = Counter() + total_jerseys = 0 + errors = 0 + start_all = time.time() + + for i, image_path in enumerate(image_files, 1): + print(f"[{i}/{len(image_files)}] {image_path.name} ... ", end="", flush=True) + + image = cv2.imread(str(image_path)) + if image is None: + print("SKIP (failed to load)") + errors += 1 + continue + + message = client.create_multimodal_message(role="user", content=prompt, images=[image]) + + try: + t0 = time.time() + response = client.chat_completion(messages=[message], temperature=0.1, max_tokens=1000) + elapsed = time.time() - t0 + + response_text = response['choices'][0]['message']['content'] + cleaned = clean_response(response_text) + result = json.loads(cleaned) + jerseys = result.get('jerseys', []) + + colors_found = [] + for j in jerseys: + jc = j.get('jersey_color', '').strip().lower() + nc = j.get('number_color', '').strip().lower() + if jc: + jersey_color_counter[jc] += 1 + if nc: + number_color_counter[nc] += 1 + colors_found.append(f"{jc}/{nc}") + total_jerseys += 1 + + print(f"{len(jerseys)} jersey(s) in {elapsed:.1f}s {', '.join(colors_found) if colors_found else '(none)'}") + + except (json.JSONDecodeError, KeyError, IndexError) as e: + print(f"PARSE ERROR: {e}") + errors += 1 + except Exception as e: + print(f"ERROR: {e}") + errors += 1 + + total_time = time.time() - start_all + + # --- Summary --- + print() + print("=" * 70) + print("COLOR VARIETY SUMMARY") + print("=" * 70) + print(f"Images processed: {len(image_files)}") + print(f"Total jerseys detected: {total_jerseys}") + print(f"Errors: {errors}") + print(f"Total time: {total_time:.1f}s ({total_time / len(image_files):.1f}s avg)") + + print(f"\n--- Jersey Colors ({len(jersey_color_counter)} unique) ---") + for color, count in jersey_color_counter.most_common(): + bar = "#" * min(count, 50) + print(f" {color:25s} {count:4d} {bar}") + + print(f"\n--- Number Colors ({len(number_color_counter)} unique) ---") + for color, count in number_color_counter.most_common(): + bar = "#" * min(count, 50) + print(f" {color:25s} {count:4d} {bar}") + + # Combined unique palette + all_colors = sorted(set(jersey_color_counter.keys()) | set(number_color_counter.keys())) + print(f"\n--- Combined Color Palette ({len(all_colors)} unique values) ---") + for color in all_colors: + jc = jersey_color_counter.get(color, 0) + nc = number_color_counter.get(color, 0) + print(f" {color:25s} jersey:{jc:3d} number:{nc:3d}") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/test_color_variety_gemini.py b/test_color_variety_gemini.py new file mode 100644 index 0000000..a11731e --- /dev/null +++ b/test_color_variety_gemini.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +""" +Test script to discover the variety of colors the Gemini 3 Flash VLM returns +for jersey detection. + +Submits all test images to the Gemini API and tallies every unique jersey_color +and number_color value, producing a summary of the model's color vocabulary. + +Usage: + python test_color_variety_gemini.py +""" + +import base64 +import json +import os +import re +import sys +import time +from collections import Counter +from pathlib import Path + +import cv2 +import requests + +GEMINI_MODEL = "gemini-3-flash-preview" +API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_MODEL}:generateContent" + +IMAGES_DIR = os.path.join(os.path.dirname(__file__), "basketball_jersery_color_test_files") +PROMPT_FILE = os.path.join(os.path.dirname(__file__), "jersey_prompt.txt") +API_KEY_FILE = os.path.join(os.path.dirname(__file__), "gemini_api_key.txt") + + +def load_api_key() -> str: + with open(API_KEY_FILE, 'r') as f: + return f.read().strip() + + +def clean_response(text: str) -> str: + """Remove think tags and markdown code blocks from model output.""" + cleaned = re.sub(r'.*?', '', text, flags=re.DOTALL | re.IGNORECASE) + cleaned = re.sub(r'', '', cleaned, flags=re.IGNORECASE) + + json_block = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', cleaned, flags=re.DOTALL | re.IGNORECASE) + if json_block: + cleaned = json_block.group(1) + else: + cleaned = re.sub(r'```(?:json)?', '', cleaned, flags=re.IGNORECASE) + + return cleaned.strip() + + +def salvage_jerseys(text: str) -> list[dict]: + """Extract complete jersey objects from truncated JSON using regex. + + When the response is cut off mid-JSON, json.loads() fails. We can still + recover every fully-formed jersey object that was returned before the + truncation point. + """ + pattern = re.compile( + r'\{\s*' + r'"jersey_number"\s*:\s*"[^"]*"\s*,\s*' + r'"jersey_color"\s*:\s*"([^"]*)"\s*,\s*' + r'"number_color"\s*:\s*"([^"]*)"\s*' + r'\}', + re.DOTALL, + ) + jerseys = [] + for m in pattern.finditer(text): + jerseys.append({ + 'jersey_color': m.group(1), + 'number_color': m.group(2), + }) + return jerseys + + +def encode_image(image_path: str) -> tuple[str, str]: + """Read an image file and return (base64_data, mime_type).""" + ext = Path(image_path).suffix.lower() + mime_map = { + '.jpg': 'image/jpeg', + '.jpeg': 'image/jpeg', + '.png': 'image/png', + '.webp': 'image/webp', + '.bmp': 'image/bmp', + '.tiff': 'image/tiff', + } + mime_type = mime_map.get(ext, 'image/jpeg') + + with open(image_path, 'rb') as f: + data = base64.b64encode(f.read()).decode('utf-8') + + return data, mime_type + + +MAX_RETRIES = 3 +RETRY_BACKOFF = [2, 5, 10] # seconds between retries + + +def call_gemini(api_key: str, image_path: str, prompt: str) -> dict: + """Send an image + prompt to the Gemini API and return parsed JSON.""" + image_data, mime_type = encode_image(image_path) + + payload = { + "contents": [{ + "parts": [ + { + "inline_data": { + "mime_type": mime_type, + "data": image_data, + } + }, + { + "text": prompt, + } + ] + }], + "generationConfig": { + "temperature": 0.1, + "maxOutputTokens": 8192, + "responseMimeType": "application/json", + } + } + + for attempt in range(MAX_RETRIES): + response = requests.post( + API_URL, + headers={ + "x-goog-api-key": api_key, + "Content-Type": "application/json", + }, + json=payload, + ) + + if response.status_code >= 500 and attempt < MAX_RETRIES - 1: + wait = RETRY_BACKOFF[attempt] + print(f"HTTP {response.status_code}, retry in {wait}s ... ", end="", flush=True) + time.sleep(wait) + continue + + response.raise_for_status() + return response.json() + + # Should not reach here, but just in case + response.raise_for_status() + return response.json() + + +def main(): + api_key = load_api_key() + + with open(PROMPT_FILE, 'r') as f: + prompt = f.read() + + # Gather image files (extensions the API can handle) + valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} + image_files = sorted([ + p for p in Path(IMAGES_DIR).iterdir() + if p.suffix.lower() in valid_extensions + ]) + + skipped = sorted([ + p.name for p in Path(IMAGES_DIR).iterdir() + if p.is_file() and p.suffix.lower() not in valid_extensions + ]) + + print(f"Model: {GEMINI_MODEL}") + print(f"Images to process: {len(image_files)}") + if skipped: + print(f"Skipping {len(skipped)} unsupported files: {', '.join(skipped)}") + print(f"Prompt: {PROMPT_FILE} ({len(prompt)} chars)") + print("=" * 70) + + jersey_color_counter = Counter() + number_color_counter = Counter() + total_jerseys = 0 + errors = 0 + start_all = time.time() + + for i, image_path in enumerate(image_files, 1): + print(f"[{i}/{len(image_files)}] {image_path.name} ... ", end="", flush=True) + + try: + t0 = time.time() + resp = call_gemini(api_key, str(image_path), prompt) + elapsed = time.time() - t0 + + # Extract text from Gemini response + text = resp['candidates'][0]['content']['parts'][0]['text'] + cleaned = clean_response(text) + result = json.loads(cleaned) + jerseys = result.get('jerseys', []) + + colors_found = [] + for j in jerseys: + jc = j.get('jersey_color', '').strip().lower() + nc = j.get('number_color', '').strip().lower() + if jc: + jersey_color_counter[jc] += 1 + if nc: + number_color_counter[nc] += 1 + colors_found.append(f"{jc}/{nc}") + total_jerseys += 1 + + print(f"{len(jerseys)} jersey(s) in {elapsed:.1f}s {', '.join(colors_found) if colors_found else '(none)'}") + + except (json.JSONDecodeError, KeyError, IndexError) as e: + raw = "" + try: + raw = resp['candidates'][0]['content']['parts'][0]['text'] + except Exception: + pass + # Try to salvage complete jersey objects from truncated JSON + salvaged = salvage_jerseys(raw) if raw else [] + if salvaged: + colors_found = [] + for j in salvaged: + jc = j.get('jersey_color', '').strip().lower() + nc = j.get('number_color', '').strip().lower() + if jc: + jersey_color_counter[jc] += 1 + if nc: + number_color_counter[nc] += 1 + colors_found.append(f"{jc}/{nc}") + total_jerseys += 1 + print(f"TRUNCATED, salvaged {len(salvaged)} jersey(s) in {elapsed:.1f}s {', '.join(colors_found)}") + else: + print(f"PARSE ERROR: {e}") + if raw: + print(f" raw: {raw[:200]}") + errors += 1 + except requests.exceptions.HTTPError as e: + print(f"HTTP ERROR: {e}") + errors += 1 + except Exception as e: + print(f"ERROR: {e}") + errors += 1 + + total_time = time.time() - start_all + + # --- Summary --- + print() + print("=" * 70) + print(f"COLOR VARIETY SUMMARY ({GEMINI_MODEL})") + print("=" * 70) + print(f"Images processed: {len(image_files)}") + print(f"Total jerseys detected: {total_jerseys}") + print(f"Errors: {errors}") + print(f"Total time: {total_time:.1f}s ({total_time / len(image_files):.1f}s avg)") + + print(f"\n--- Jersey Colors ({len(jersey_color_counter)} unique) ---") + for color, count in jersey_color_counter.most_common(): + bar = "#" * min(count, 50) + print(f" {color:25s} {count:4d} {bar}") + + print(f"\n--- Number Colors ({len(number_color_counter)} unique) ---") + for color, count in number_color_counter.most_common(): + bar = "#" * min(count, 50) + print(f" {color:25s} {count:4d} {bar}") + + # Combined unique palette + all_colors = sorted(set(jersey_color_counter.keys()) | set(number_color_counter.keys())) + print(f"\n--- Combined Color Palette ({len(all_colors)} unique values) ---") + for color in all_colors: + jc = jersey_color_counter.get(color, 0) + nc = number_color_counter.get(color, 0) + print(f" {color:25s} jersey:{jc:3d} number:{nc:3d}") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/test_hex_color_specificity.py b/test_hex_color_specificity.py new file mode 100644 index 0000000..fc5f8f5 --- /dev/null +++ b/test_hex_color_specificity.py @@ -0,0 +1,355 @@ +#!/usr/bin/env python3 +""" +Test whether the Gemini 3 Flash VLM can return specific hex color codes +rather than generic named colors. + +Sends a random sample of 10 images using a hex-color prompt, then analyzes +how specific the returned colors actually are by comparing them against +a set of known "pure primary" hex values. + +Usage: + python test_hex_color_specificity.py + python test_hex_color_specificity.py --sample 20 + python test_hex_color_specificity.py --seed 42 +""" + +import argparse +import base64 +import colorsys +import json +import math +import os +import random +import re +import time +from pathlib import Path + +import requests + +GEMINI_MODEL = "gemini-3-flash-preview" +API_URL = f"https://generativelanguage.googleapis.com/v1beta/models/{GEMINI_MODEL}:generateContent" + +IMAGES_DIR = os.path.join(os.path.dirname(__file__), "basketball_jersery_color_test_files") +PROMPT_FILE = os.path.join(os.path.dirname(__file__), "jersey_prompt_hex_color.txt") +API_KEY_FILE = os.path.join(os.path.dirname(__file__), "gemini_api_key.txt") + +# Pure primary / basic colors that indicate the model is NOT being specific +PRIMARY_COLORS = { + "#FF0000": "red", + "#00FF00": "green", + "#0000FF": "blue", + "#FFFF00": "yellow", + "#FF00FF": "magenta", + "#00FFFF": "cyan", + "#FFFFFF": "white", + "#000000": "black", + "#808080": "gray", + "#FFA500": "orange", + "#800080": "purple", + "#FFC0CB": "pink", + "#A52A2A": "brown", + "#800000": "maroon", + "#008000": "green (dark)", + "#000080": "navy", + "#C0C0C0": "silver", + "#FFD700": "gold", +} + +# Distance threshold: how close to a primary a hex must be to count as "generic" +# In RGB space (0-255 per channel), 20 is very close +GENERIC_DISTANCE_THRESHOLD = 20 + + +def hex_to_rgb(h: str) -> tuple[int, int, int] | None: + """Parse a hex color string to (r, g, b). Returns None if invalid.""" + h = h.strip().lstrip('#') + if len(h) == 6: + try: + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + except ValueError: + return None + if len(h) == 3: + try: + return (int(h[0]*2, 16), int(h[1]*2, 16), int(h[2]*2, 16)) + except ValueError: + return None + return None + + +def rgb_distance(a: tuple[int, int, int], b: tuple[int, int, int]) -> float: + """Euclidean distance between two RGB colors.""" + return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b))) + + +def rgb_to_hsl(r: int, g: int, b: int) -> tuple[float, float, float]: + """Convert RGB (0-255) to HSL (h: 0-360, s: 0-100, l: 0-100).""" + h, l, s = colorsys.rgb_to_hls(r / 255, g / 255, b / 255) + return round(h * 360, 1), round(s * 100, 1), round(l * 100, 1) + + +def classify_color(hex_str: str) -> dict: + """Classify a hex color as generic/primary or specific. + + Returns a dict with: + valid: bool - whether the hex code is a valid color + hex: str - normalized hex code + rgb: tuple - (r, g, b) + hsl: tuple - (h, s, l) + is_generic: bool - True if the color is a pure primary / basic color + nearest_primary: str - the closest primary color name + primary_distance: float - RGB distance to the nearest primary + """ + rgb = hex_to_rgb(hex_str) + if rgb is None: + return {'valid': False, 'hex': hex_str, 'reason': 'invalid hex'} + + normalized = f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" + hsl = rgb_to_hsl(*rgb) + + # Find nearest primary + best_name = "unknown" + best_dist = float('inf') + for phex, pname in PRIMARY_COLORS.items(): + prgb = hex_to_rgb(phex) + d = rgb_distance(rgb, prgb) + if d < best_dist: + best_dist = d + best_name = pname + + is_generic = best_dist < GENERIC_DISTANCE_THRESHOLD + + return { + 'valid': True, + 'hex': normalized, + 'rgb': rgb, + 'hsl': hsl, + 'is_generic': is_generic, + 'nearest_primary': best_name, + 'primary_distance': round(best_dist, 1), + } + + +def load_api_key() -> str: + with open(API_KEY_FILE, 'r') as f: + return f.read().strip() + + +def clean_response(text: str) -> str: + cleaned = re.sub(r'.*?', '', text, flags=re.DOTALL | re.IGNORECASE) + cleaned = re.sub(r'', '', cleaned, flags=re.IGNORECASE) + json_block = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', cleaned, flags=re.DOTALL | re.IGNORECASE) + if json_block: + cleaned = json_block.group(1) + else: + cleaned = re.sub(r'```(?:json)?', '', cleaned, flags=re.IGNORECASE) + return cleaned.strip() + + +def salvage_jerseys(text: str) -> list[dict]: + """Extract complete jersey objects from truncated JSON.""" + pattern = re.compile( + r'\{\s*' + r'"jersey_number"\s*:\s*"[^"]*"\s*,\s*' + r'"jersey_color"\s*:\s*"([^"]*)"\s*,\s*' + r'"number_color"\s*:\s*"([^"]*)"\s*' + r'\}', + re.DOTALL, + ) + return [{'jersey_color': m.group(1), 'number_color': m.group(2)} for m in pattern.finditer(text)] + + +def encode_image(image_path: str) -> tuple[str, str]: + ext = Path(image_path).suffix.lower() + mime_map = {'.jpg': 'image/jpeg', '.jpeg': 'image/jpeg', '.png': 'image/png', + '.webp': 'image/webp', '.bmp': 'image/bmp', '.tiff': 'image/tiff'} + mime_type = mime_map.get(ext, 'image/jpeg') + with open(image_path, 'rb') as f: + data = base64.b64encode(f.read()).decode('utf-8') + return data, mime_type + + +MAX_RETRIES = 3 +RETRY_BACKOFF = [2, 5, 10] + + +def call_gemini(api_key: str, image_path: str, prompt: str) -> dict: + image_data, mime_type = encode_image(image_path) + payload = { + "contents": [{"parts": [ + {"inline_data": {"mime_type": mime_type, "data": image_data}}, + {"text": prompt}, + ]}], + "generationConfig": { + "temperature": 0.1, + "maxOutputTokens": 8192, + "responseMimeType": "application/json", + } + } + for attempt in range(MAX_RETRIES): + response = requests.post( + API_URL, + headers={"x-goog-api-key": api_key, "Content-Type": "application/json"}, + json=payload, + ) + if response.status_code >= 500 and attempt < MAX_RETRIES - 1: + wait = RETRY_BACKOFF[attempt] + print(f"HTTP {response.status_code}, retry in {wait}s ... ", end="", flush=True) + time.sleep(wait) + continue + response.raise_for_status() + return response.json() + response.raise_for_status() + return response.json() + + +def main(): + parser = argparse.ArgumentParser(description="Test hex color specificity from Gemini VLM") + parser.add_argument('--sample', type=int, default=10, help='Number of random images to test (default: 10)') + parser.add_argument('--seed', type=int, default=None, help='Random seed for reproducibility') + args = parser.parse_args() + + api_key = load_api_key() + + with open(PROMPT_FILE, 'r') as f: + prompt = f.read() + + # Gather and sample image files + valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} + all_images = sorted([ + p for p in Path(IMAGES_DIR).iterdir() + if p.suffix.lower() in valid_extensions + ]) + + seed = args.seed if args.seed is not None else random.randint(0, 99999) + rng = random.Random(seed) + sample_size = min(args.sample, len(all_images)) + sample_images = rng.sample(all_images, sample_size) + sample_images.sort() + + print(f"Model: {GEMINI_MODEL}") + print(f"Prompt: {PROMPT_FILE}") + print(f"Sample: {sample_size} images (seed={seed})") + print(f"Selected: {', '.join(p.name for p in sample_images)}") + print("=" * 70) + + # Collect all color classifications + all_colors = [] # list of dicts with image, field, hex, classification + total_jerseys = 0 + errors = 0 + + for i, image_path in enumerate(sample_images, 1): + print(f"\n[{i}/{sample_size}] {image_path.name}") + + try: + t0 = time.time() + resp = call_gemini(api_key, str(image_path), prompt) + elapsed = time.time() - t0 + + text = resp['candidates'][0]['content']['parts'][0]['text'] + cleaned = clean_response(text) + + try: + result = json.loads(cleaned) + jerseys = result.get('jerseys', []) + except json.JSONDecodeError: + jerseys = salvage_jerseys(cleaned) + if jerseys: + print(f" (truncated response, salvaged {len(jerseys)} jersey(s))") + + print(f" {len(jerseys)} jersey(s) detected in {elapsed:.1f}s") + + for j in jerseys: + total_jerseys += 1 + raw_hex = j.get('jersey_color', '') + c = classify_color(raw_hex) + c['image'] = image_path.name + c['field'] = 'jersey_color' + c['raw'] = raw_hex + all_colors.append(c) + + if not c['valid']: + status = f" INVALID ({raw_hex})" + elif c['is_generic']: + status = f" GENERIC ~{c['nearest_primary']}" + else: + status = f" SPECIFIC (nearest: {c['nearest_primary']}, dist={c['primary_distance']})" + + print(f" jersey: {raw_hex:10s} -> {status}") + + except requests.exceptions.HTTPError as e: + print(f" HTTP ERROR: {e}") + errors += 1 + except Exception as e: + print(f" ERROR: {e}") + errors += 1 + + # --- Analysis --- + print() + print("=" * 70) + print("HEX COLOR SPECIFICITY ANALYSIS") + print("=" * 70) + print(f"Model: {GEMINI_MODEL}") + print(f"Images tested: {sample_size} (seed={seed})") + print(f"Total jerseys: {total_jerseys}") + print(f"Total jersey color values: {len(all_colors)}") + print(f"Errors: {errors}") + + valid_colors = [c for c in all_colors if c['valid']] + invalid_colors = [c for c in all_colors if not c['valid']] + + print(f"\nValid hex codes: {len(valid_colors)}/{len(all_colors)}") + if invalid_colors: + print(f"Invalid values ({len(invalid_colors)}):") + for c in invalid_colors: + print(f" {c['image']} {c['field']}: {c['raw']}") + + if not valid_colors: + print("\nNo valid hex colors returned. The model may not support hex output.") + return + + generic = [c for c in valid_colors if c['is_generic']] + specific = [c for c in valid_colors if not c['is_generic']] + + pct_specific = len(specific) / len(valid_colors) * 100 + + print(f"\n--- Specificity Breakdown ---") + print(f" Generic (near a pure primary): {len(generic):3d} ({100 - pct_specific:.1f}%)") + print(f" Specific (distinct shade): {len(specific):3d} ({pct_specific:.1f}%)") + + # Show unique hex values returned + unique_hexes = sorted(set(c['hex'] for c in valid_colors)) + print(f"\n--- Unique Hex Values ({len(unique_hexes)}) ---") + for h in unique_hexes: + rgb = hex_to_rgb(h) + hsl = rgb_to_hsl(*rgb) + cl = classify_color(h) + tag = "GENERIC" if cl['is_generic'] else "specific" + count = sum(1 for c in valid_colors if c['hex'] == h) + print(f" {h} RGB({rgb[0]:3d},{rgb[1]:3d},{rgb[2]:3d}) " + f"HSL({hsl[0]:5.1f},{hsl[1]:4.1f}%,{hsl[2]:4.1f}%) " + f"x{count} [{tag}, near {cl['nearest_primary']}, d={cl['primary_distance']}]") + + # Distance statistics + distances = [c['primary_distance'] for c in valid_colors] + avg_dist = sum(distances) / len(distances) + min_dist = min(distances) + max_dist = max(distances) + print(f"\n--- Distance from Nearest Primary ---") + print(f" Min: {min_dist:.1f} Avg: {avg_dist:.1f} Max: {max_dist:.1f}") + print(f" (Higher = more specific. Threshold for 'generic' = {GENERIC_DISTANCE_THRESHOLD})") + + # Verdict + print(f"\n--- Verdict ---") + if pct_specific >= 80: + print(f" The model returns SPECIFIC hex colors ({pct_specific:.0f}% distinct shades).") + print(f" It is capable of estimating precise colors from images.") + elif pct_specific >= 40: + print(f" MIXED results ({pct_specific:.0f}% specific).") + print(f" The model sometimes returns specific shades but often falls back to primaries.") + else: + print(f" The model mostly returns GENERIC primary colors ({pct_specific:.0f}% specific).") + print(f" Hex codes are largely just the standard primary color equivalents.") + + +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/test_hex_color_specificity_llama.py b/test_hex_color_specificity_llama.py new file mode 100644 index 0000000..7c6a925 --- /dev/null +++ b/test_hex_color_specificity_llama.py @@ -0,0 +1,316 @@ +#!/usr/bin/env python3 +""" +Test whether a local llama.cpp VLM can return specific hex color codes +rather than generic named colors. + +Sends a random sample of 10 images using a hex-color prompt, then analyzes +how specific the returned colors actually are by comparing them against +a set of known "pure primary" hex values. + +Usage: + python test_hex_color_specificity_llama.py + python test_hex_color_specificity_llama.py --sample 20 + python test_hex_color_specificity_llama.py --seed 42 + python test_hex_color_specificity_llama.py --server-url http://hitagi:8080 +""" + +import argparse +import colorsys +import json +import math +import os +import random +import re +import sys +import time +from pathlib import Path + +import cv2 + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__))) +from scan_utils.llama_cpp_client import LlamaCppClient + +SERVER_URL = "http://agx:8080" +IMAGES_DIR = os.path.join(os.path.dirname(__file__), "basketball_jersery_color_test_files") +PROMPT_FILE = os.path.join(os.path.dirname(__file__), "jersey_prompt_hex_color.txt") + +# Pure primary / basic colors that indicate the model is NOT being specific +PRIMARY_COLORS = { + "#FF0000": "red", + "#00FF00": "green", + "#0000FF": "blue", + "#FFFF00": "yellow", + "#FF00FF": "magenta", + "#00FFFF": "cyan", + "#FFFFFF": "white", + "#000000": "black", + "#808080": "gray", + "#FFA500": "orange", + "#800080": "purple", + "#FFC0CB": "pink", + "#A52A2A": "brown", + "#800000": "maroon", + "#008000": "green (dark)", + "#000080": "navy", + "#C0C0C0": "silver", + "#FFD700": "gold", +} + +# Distance threshold: how close to a primary a hex must be to count as "generic" +# In RGB space (0-255 per channel), 20 is very close +GENERIC_DISTANCE_THRESHOLD = 20 + + +def hex_to_rgb(h: str) -> tuple[int, int, int] | None: + """Parse a hex color string to (r, g, b). Returns None if invalid.""" + h = h.strip().lstrip('#') + if len(h) == 6: + try: + return (int(h[0:2], 16), int(h[2:4], 16), int(h[4:6], 16)) + except ValueError: + return None + if len(h) == 3: + try: + return (int(h[0]*2, 16), int(h[1]*2, 16), int(h[2]*2, 16)) + except ValueError: + return None + return None + + +def rgb_distance(a: tuple[int, int, int], b: tuple[int, int, int]) -> float: + """Euclidean distance between two RGB colors.""" + return math.sqrt(sum((x - y) ** 2 for x, y in zip(a, b))) + + +def rgb_to_hsl(r: int, g: int, b: int) -> tuple[float, float, float]: + """Convert RGB (0-255) to HSL (h: 0-360, s: 0-100, l: 0-100).""" + h, l, s = colorsys.rgb_to_hls(r / 255, g / 255, b / 255) + return round(h * 360, 1), round(s * 100, 1), round(l * 100, 1) + + +def classify_color(hex_str: str) -> dict: + """Classify a hex color as generic/primary or specific.""" + rgb = hex_to_rgb(hex_str) + if rgb is None: + return {'valid': False, 'hex': hex_str, 'reason': 'invalid hex'} + + normalized = f"#{rgb[0]:02X}{rgb[1]:02X}{rgb[2]:02X}" + hsl = rgb_to_hsl(*rgb) + + best_name = "unknown" + best_dist = float('inf') + for phex, pname in PRIMARY_COLORS.items(): + prgb = hex_to_rgb(phex) + d = rgb_distance(rgb, prgb) + if d < best_dist: + best_dist = d + best_name = pname + + is_generic = best_dist < GENERIC_DISTANCE_THRESHOLD + + return { + 'valid': True, + 'hex': normalized, + 'rgb': rgb, + 'hsl': hsl, + 'is_generic': is_generic, + 'nearest_primary': best_name, + 'primary_distance': round(best_dist, 1), + } + + +def clean_response(text: str) -> str: + cleaned = re.sub(r'.*?', '', text, flags=re.DOTALL | re.IGNORECASE) + cleaned = re.sub(r'\u25c1think\u25b7.*?\u25c1/think\u25b7', '', cleaned, flags=re.DOTALL) + cleaned = re.sub(r'', '', cleaned, flags=re.IGNORECASE) + cleaned = re.sub(r'\u25c1/?think\u25b7', '', cleaned, flags=re.IGNORECASE) + json_block = re.search(r'```(?:json)?\s*\n?(.*?)\n?```', cleaned, flags=re.DOTALL | re.IGNORECASE) + if json_block: + cleaned = json_block.group(1) + else: + cleaned = re.sub(r'```(?:json)?', '', cleaned, flags=re.IGNORECASE) + return cleaned.strip() + + +def salvage_jerseys(text: str) -> list[dict]: + """Extract complete jersey objects from truncated JSON.""" + pattern = re.compile( + r'\{\s*' + r'"jersey_number"\s*:\s*"[^"]*"\s*,\s*' + r'"jersey_color"\s*:\s*"([^"]*)"\s*,\s*' + r'"number_color"\s*:\s*"([^"]*)"\s*' + r'\}', + re.DOTALL, + ) + return [{'jersey_color': m.group(1), 'number_color': m.group(2)} for m in pattern.finditer(text)] + + +def main(): + parser = argparse.ArgumentParser(description="Test hex color specificity from local llama.cpp VLM") + parser.add_argument('--sample', type=int, default=10, help='Number of random images to test (default: 10)') + parser.add_argument('--seed', type=int, default=None, help='Random seed for reproducibility') + parser.add_argument('--server-url', default=SERVER_URL, help=f'llama.cpp server URL (default: {SERVER_URL})') + args = parser.parse_args() + + with open(PROMPT_FILE, 'r') as f: + prompt = f.read() + + client = LlamaCppClient(base_url=args.server_url) + + # Detect model name from server + model_name = "unknown" + try: + models = client.get_models() + if 'data' in models and len(models['data']) > 0: + model_id = models['data'][0].get('id', 'unknown') + model_name = os.path.splitext(os.path.basename(model_id))[0] if model_id else "unknown" + except Exception: + pass + + # Gather and sample image files + valid_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.tiff', '.webp'} + all_images = sorted([ + p for p in Path(IMAGES_DIR).iterdir() + if p.suffix.lower() in valid_extensions + ]) + + seed = args.seed if args.seed is not None else random.randint(0, 99999) + rng = random.Random(seed) + sample_size = min(args.sample, len(all_images)) + sample_images = rng.sample(all_images, sample_size) + sample_images.sort() + + print(f"Model: {model_name}") + print(f"Server: {args.server_url}") + print(f"Prompt: {PROMPT_FILE}") + print(f"Sample: {sample_size} images (seed={seed})") + print(f"Selected: {', '.join(p.name for p in sample_images)}") + print("=" * 70) + + # Collect all color classifications + all_colors = [] + total_jerseys = 0 + errors = 0 + + for i, image_path in enumerate(sample_images, 1): + print(f"\n[{i}/{sample_size}] {image_path.name}") + + image = cv2.imread(str(image_path)) + if image is None: + print(f" SKIP (failed to load)") + errors += 1 + continue + + try: + t0 = time.time() + + message = client.create_multimodal_message(role="user", content=prompt, images=[image]) + response = client.chat_completion(messages=[message], temperature=0.1, max_tokens=1000) + elapsed = time.time() - t0 + + response_text = response['choices'][0]['message']['content'] + cleaned = clean_response(response_text) + + try: + result = json.loads(cleaned) + jerseys = result.get('jerseys', []) + except json.JSONDecodeError: + jerseys = salvage_jerseys(cleaned) + if jerseys: + print(f" (truncated response, salvaged {len(jerseys)} jersey(s))") + + print(f" {len(jerseys)} jersey(s) detected in {elapsed:.1f}s") + + for j in jerseys: + total_jerseys += 1 + raw_hex = j.get('jersey_color', '') + c = classify_color(raw_hex) + c['image'] = image_path.name + c['field'] = 'jersey_color' + c['raw'] = raw_hex + all_colors.append(c) + + if not c['valid']: + status = f" INVALID ({raw_hex})" + elif c['is_generic']: + status = f" GENERIC ~{c['nearest_primary']}" + else: + status = f" SPECIFIC (nearest: {c['nearest_primary']}, dist={c['primary_distance']})" + + print(f" jersey: {raw_hex:10s} -> {status}") + + except Exception as e: + print(f" ERROR: {e}") + errors += 1 + + # --- Analysis --- + print() + print("=" * 70) + print("HEX COLOR SPECIFICITY ANALYSIS") + print("=" * 70) + print(f"Model: {model_name}") + print(f"Server: {args.server_url}") + print(f"Images tested: {sample_size} (seed={seed})") + print(f"Total jerseys: {total_jerseys}") + print(f"Total jersey color values: {len(all_colors)}") + print(f"Errors: {errors}") + + valid_colors = [c for c in all_colors if c['valid']] + invalid_colors = [c for c in all_colors if not c['valid']] + + print(f"\nValid hex codes: {len(valid_colors)}/{len(all_colors)}") + if invalid_colors: + print(f"Invalid values ({len(invalid_colors)}):") + for c in invalid_colors: + print(f" {c['image']} {c['field']}: {c['raw']}") + + if not valid_colors: + print("\nNo valid hex colors returned. The model may not support hex output.") + return + + generic = [c for c in valid_colors if c['is_generic']] + specific = [c for c in valid_colors if not c['is_generic']] + + pct_specific = len(specific) / len(valid_colors) * 100 + + print(f"\n--- Specificity Breakdown ---") + print(f" Generic (near a pure primary): {len(generic):3d} ({100 - pct_specific:.1f}%)") + print(f" Specific (distinct shade): {len(specific):3d} ({pct_specific:.1f}%)") + + # Show unique hex values returned + unique_hexes = sorted(set(c['hex'] for c in valid_colors)) + print(f"\n--- Unique Hex Values ({len(unique_hexes)}) ---") + for h in unique_hexes: + rgb = hex_to_rgb(h) + hsl = rgb_to_hsl(*rgb) + cl = classify_color(h) + tag = "GENERIC" if cl['is_generic'] else "specific" + count = sum(1 for c in valid_colors if c['hex'] == h) + print(f" {h} RGB({rgb[0]:3d},{rgb[1]:3d},{rgb[2]:3d}) " + f"HSL({hsl[0]:5.1f},{hsl[1]:4.1f}%,{hsl[2]:4.1f}%) " + f"x{count} [{tag}, near {cl['nearest_primary']}, d={cl['primary_distance']}]") + + # Distance statistics + distances = [c['primary_distance'] for c in valid_colors] + avg_dist = sum(distances) / len(distances) + min_dist = min(distances) + max_dist = max(distances) + print(f"\n--- Distance from Nearest Primary ---") + print(f" Min: {min_dist:.1f} Avg: {avg_dist:.1f} Max: {max_dist:.1f}") + print(f" (Higher = more specific. Threshold for 'generic' = {GENERIC_DISTANCE_THRESHOLD})") + + # Verdict + print(f"\n--- Verdict ---") + if pct_specific >= 80: + print(f" The model returns SPECIFIC hex colors ({pct_specific:.0f}% distinct shades).") + print(f" It is capable of estimating precise colors from images.") + elif pct_specific >= 40: + print(f" MIXED results ({pct_specific:.0f}% specific).") + print(f" The model sometimes returns specific shades but often falls back to primaries.") + else: + print(f" The model mostly returns GENERIC primary colors ({pct_specific:.0f}% specific).") + print(f" Hex codes are largely just the standard primary color equivalents.") + + +if __name__ == '__main__': + main() \ No newline at end of file