Add accuracy test framework, prompts, results, and analysis reports

Includes accuracy test scripts for Qwen (local) and Gemini (cloud API),
three prompt variants (original, capstone, constrained), test results
from all runs, and two analysis reports with an HTML presentation version.
This commit is contained in:
2026-03-03 18:44:49 -07:00
parent 435033ea07
commit 5405d7f7dc
13 changed files with 8561 additions and 0 deletions

44
run_all_accuracy_tests.sh Executable file
View File

@ -0,0 +1,44 @@
#!/usr/bin/env bash
#
# Run both accuracy test scripts against all three prompts.
# Results are saved to accuracy_test_results_all.txt
#
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
OUTPUT_FILE="${SCRIPT_DIR}/accuracy_test_results_all.txt"
PROMPTS=(
"jersey_prompt.txt"
"jersey_prompt_capstone.txt"
"jersey_prompt_constrained.txt"
)
echo "Results will be saved to: ${OUTPUT_FILE}"
echo "Started at: $(date)"
echo ""
> "$OUTPUT_FILE"
for prompt in "${PROMPTS[@]}"; do
prompt_path="${SCRIPT_DIR}/${prompt}"
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "Qwen3-VL-8B + ${prompt}" | tee -a "$OUTPUT_FILE"
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
python3 "${SCRIPT_DIR}/test_accuracy.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
echo "" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "Gemini 3 Flash + ${prompt}" | tee -a "$OUTPUT_FILE"
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
python3 "${SCRIPT_DIR}/test_accuracy_gemini.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
echo "" | tee -a "$OUTPUT_FILE"
done
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "All tests completed at: $(date)" | tee -a "$OUTPUT_FILE"
echo "Results saved to: ${OUTPUT_FILE}"