Includes accuracy test scripts for Qwen (local) and Gemini (cloud API), three prompt variants (original, capstone, constrained), test results from all runs, and two analysis reports with an HTML presentation version.
45 lines
1.5 KiB
Bash
Executable File
45 lines
1.5 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
#
|
|
# Run both accuracy test scripts against all three prompts.
|
|
# Results are saved to accuracy_test_results_all.txt
|
|
#
|
|
|
|
set -e
|
|
|
|
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
OUTPUT_FILE="${SCRIPT_DIR}/accuracy_test_results_all.txt"
|
|
|
|
PROMPTS=(
|
|
"jersey_prompt.txt"
|
|
"jersey_prompt_capstone.txt"
|
|
"jersey_prompt_constrained.txt"
|
|
)
|
|
|
|
echo "Results will be saved to: ${OUTPUT_FILE}"
|
|
echo "Started at: $(date)"
|
|
echo ""
|
|
|
|
> "$OUTPUT_FILE"
|
|
|
|
for prompt in "${PROMPTS[@]}"; do
|
|
prompt_path="${SCRIPT_DIR}/${prompt}"
|
|
|
|
echo "========================================" | tee -a "$OUTPUT_FILE"
|
|
echo "Qwen3-VL-8B + ${prompt}" | tee -a "$OUTPUT_FILE"
|
|
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
|
|
echo "========================================" | tee -a "$OUTPUT_FILE"
|
|
python3 "${SCRIPT_DIR}/test_accuracy.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
|
|
echo "" | tee -a "$OUTPUT_FILE"
|
|
|
|
echo "========================================" | tee -a "$OUTPUT_FILE"
|
|
echo "Gemini 3 Flash + ${prompt}" | tee -a "$OUTPUT_FILE"
|
|
echo "Started: $(date)" | tee -a "$OUTPUT_FILE"
|
|
echo "========================================" | tee -a "$OUTPUT_FILE"
|
|
python3 "${SCRIPT_DIR}/test_accuracy_gemini.py" "$prompt_path" 2>&1 | tee -a "$OUTPUT_FILE"
|
|
echo "" | tee -a "$OUTPUT_FILE"
|
|
done
|
|
|
|
echo "========================================" | tee -a "$OUTPUT_FILE"
|
|
echo "All tests completed at: $(date)" | tee -a "$OUTPUT_FILE"
|
|
echo "Results saved to: ${OUTPUT_FILE}"
|