Skip to content

chore(deps): bump the github-actions group across 1 directory with 6 updates #1698

chore(deps): bump the github-actions group across 1 directory with 6 updates

chore(deps): bump the github-actions group across 1 directory with 6 updates #1698

Workflow file for this run

# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
# SPDX-License-Identifier: MIT
# This workflow runs tests for the evaluation tool
# Triggers when eval source code or tests are modified
# Platform: Windows only
name: Test Evaluation Tool
on:
workflow_call:
push:
branches: ["main"]
paths:
- 'src/gaia/eval/**'
- 'eval/scenarios/**'
- 'eval/corpus/**'
- 'eval/prompts/**'
- 'tests/test_eval.py'
- 'setup.py'
- '.github/workflows/test_eval.yml'
pull_request:
branches: ["main"]
types: [opened, synchronize, reopened, ready_for_review]
paths:
- 'src/gaia/eval/**'
- 'eval/scenarios/**'
- 'eval/corpus/**'
- 'eval/prompts/**'
- 'tests/test_eval.py'
- 'setup.py'
- '.github/workflows/test_eval.yml'
merge_group:
workflow_dispatch:
# Cancel in-progress runs when a new run is triggered
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: true
permissions:
contents: read
jobs:
test-eval-windows:
name: Test Eval Tool (Windows)
runs-on: windows-latest
if: github.event_name != 'pull_request' || github.event.pull_request.draft == false || contains(github.event.pull_request.labels.*.name, 'ready_for_ci')
timeout-minutes: 10 # 2 minute target but allow some buffer
steps:
- name: Checkout code
uses: actions/checkout@v7
- name: Set up Python 3.12
uses: actions/setup-python@v6
with:
python-version: '3.12'
- name: Install uv
run: |
irm https://astral.sh/uv/install.ps1 | iex
echo "$env:USERPROFILE\.local\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
shell: pwsh
- name: Install dependencies
run: |
uv pip install --system -e .
uv pip install --system pytest pytest-cov pytest-mock anthropic rich
- name: Run eval unit tests
run: |
pytest tests/test_eval.py -v --cov=src/gaia/eval --cov-report=term-missing
- name: Test eval CLI commands
run: |
# Test help command
python -m gaia.cli eval --help
# Test agent eval help
python -m gaia.cli eval agent --help