Files
jersey_test/run_all_accuracy_tests.sh
Rick McEwen 5405d7f7dc Add accuracy test framework, prompts, results, and analysis reports
Includes accuracy test scripts for Qwen (local) and Gemini (cloud API),
three prompt variants (original, capstone, constrained), test results
from all runs, and two analysis reports with an HTML presentation version.
2026-03-03 18:44:49 -07:00

45 lines
1.5 KiB
Bash
Executable File

#!/usr/bin/env bash
#
# Run both accuracy test scripts against all three prompts.
# Results are saved to accuracy_test_results_all.txt
#
set -e
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
OUTPUT_FILE="${SCRIPT_DIR}/accuracy_test_results_all.txt"
PROMPTS=(
"jersey_prompt.txt"
"jersey_prompt_capstone.txt"
"jersey_prompt_constrained.txt"
)
echo "Results will be saved to: ${OUTPUT_FILE}"
echo "Started at: $(date)"
echo ""
> "$OUTPUT_FILE"
for prompt in "${PROMPTS[@]}"; do
prompt_path="${SCRIPT_DIR}/${prompt}"
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "Qwen3-VL-8B + ${prompt}" | tee -a "$OUTPUT_FILE"
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
python3 "${SCRIPT_DIR}/test_accuracy.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
echo "" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "Gemini 3 Flash + ${prompt}" | tee -a "$OUTPUT_FILE"
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
echo "========================================" | tee -a "$OUTPUT_FILE"
python3 "${SCRIPT_DIR}/test_accuracy_gemini.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
echo "" | tee -a "$OUTPUT_FILE"
done
echo "========================================" | tee -a "$OUTPUT_FILE"
echo "All tests completed at: $(date)" | tee -a "$OUTPUT_FILE"
echo "Results saved to: ${OUTPUT_FILE}"