ci(publish): fix Soda PyPI --skip-existing + single approval gate #2473
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| --- | |
| name: CI pipeline | |
| on: | |
| workflow_dispatch: | |
| inputs: | |
| dataSource: | |
| description: Run tests for this Soda data source | |
| type: choice | |
| default: all | |
| options: | |
| - all | |
| - athena | |
| - bigquery | |
| - databricks | |
| - duckdb | |
| - fabric | |
| - postgres | |
| - redshift | |
| - snowflake | |
| - sqlserver | |
| - synapse | |
| - sparkdf | |
| - trino-postgres | |
| - trino-s3 | |
| includeNightlyTests: | |
| description: Include nightly-only tests (slow / rarely changing) | |
| type: boolean | |
| default: false | |
| push: | |
| branches: | |
| - 'main' | |
| # NOTE: Tag-triggered production releases are handled by release.yaml. | |
| # This workflow only runs CI + dev-pypi publishes for main branch pushes. | |
| pull_request: | |
| env: | |
| PROJECT_NAME: soda-core | |
| jobs: | |
| check: | |
| name: pre-commit & lockfile | |
| runs-on: ubuntu-24.04 | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python 3.10 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| - name: Set up UV | |
| uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5 | |
| - name: Check lockfile is up-to-date | |
| run: uv lock --check | |
| - name: Run pre-commit | |
| uses: pre-commit/action@v3.0.1 | |
| detect-skip-tests: | |
| name: detect [SKIP-TESTS] | |
| runs-on: ubuntu-24.04 | |
| outputs: | |
| skip: ${{ steps.check.outputs.skip }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Check commit message for [SKIP-TESTS] prefix | |
| id: check | |
| run: | | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| git fetch origin "${{ github.event.pull_request.head.sha }}" --depth=1 | |
| COMMIT_MSG=$(git log -1 --pretty=%s FETCH_HEAD) | |
| else | |
| COMMIT_MSG=$(git log -1 --pretty=%s) | |
| fi | |
| if [[ "$COMMIT_MSG" == "[SKIP-TESTS]"* ]]; then | |
| echo "skip=true" >> "$GITHUB_OUTPUT" | |
| echo "Commit message starts with [SKIP-TESTS] — tests will be skipped" | |
| else | |
| echo "skip=false" >> "$GITHUB_OUTPUT" | |
| fi | |
| define-test-matrix: | |
| if: needs.detect-skip-tests.outputs.skip != 'true' | |
| runs-on: ubuntu-24.04 | |
| needs: [check, detect-skip-tests] | |
| outputs: | |
| modules: ${{ steps.modules.outputs.modules }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set data source (workflow_dispatch) | |
| if: ${{ github.event_name == 'workflow_dispatch' }} | |
| run: | | |
| if [ -z "${{ inputs.dataSource }}" ] || [ "${{ inputs.dataSource }}" = "all" ]; then | |
| echo "DATA_SOURCE=all" >> "${GITHUB_ENV}" | |
| else | |
| echo "DATA_SOURCE=${{ inputs.dataSource }}" >> "${GITHUB_ENV}" | |
| fi | |
| - name: Set data source (push) | |
| if: ${{ github.event_name != 'workflow_dispatch' }} | |
| run: | | |
| echo "DATA_SOURCE=all" >> "${GITHUB_ENV}" | |
| - name: Define modules | |
| id: modules | |
| run: | | |
| echo "INFO: DATA_SOURCE is set to ${DATA_SOURCE}" | |
| if [ "${DATA_SOURCE}" = "all" ]; then | |
| echo modules=$(bash scripts/test_matrix.sh) >> "$GITHUB_OUTPUT" | |
| else | |
| echo 'modules=["__DATA_SOURCE__"]' | sed "s|__DATA_SOURCE__|${DATA_SOURCE}|g" >> "$GITHUB_OUTPUT" | |
| fi | |
| test: | |
| runs-on: ubuntu-24.04 | |
| needs: [define-test-matrix] | |
| services: | |
| postgres: | |
| # please keep the postgres version in sync with the one in docker-compose.yml for postgres | |
| # Also used by trino-postgres (Trino's PostgreSQL catalog connector targets this instance) | |
| image: ${{ ( matrix.module == 'postgres' || startsWith(matrix.module, 'trino') ) && 'postgres:15.10-alpine3.21' || '' }} | |
| env: | |
| POSTGRES_USER: soda_test | |
| POSTGRES_DB: soda_test | |
| POSTGRES_HOST_AUTH_METHOD: trust | |
| options: >- | |
| --health-cmd pg_isready | |
| --health-interval 10s | |
| --health-timeout 5s | |
| --health-retries 5 | |
| --health-start-period 10s | |
| ports: | |
| - 5432:5432 | |
| sqlserver: | |
| image: ${{ ( matrix.module == 'sqlserver' ) && 'mcr.microsoft.com/mssql/server:2022-latest' || '' }} | |
| env: | |
| ACCEPT_EULA: Y | |
| SA_PASSWORD: Password1! | |
| ports: | |
| - 1433:1433 | |
| options: >- | |
| --health-cmd "/opt/mssql-tools18/bin/sqlcmd -S localhost -U sa -P Password1! -Q 'select 1' -C -b -o /dev/null" | |
| --health-interval 1s | |
| --health-timeout 2s | |
| --health-retries 10 | |
| --health-start-period 10s | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| python-version: ${{ github.event_name == 'pull_request' && fromJSON('["3.14"]') || fromJSON('["3.10", "3.11", "3.12", "3.13", "3.14"]') }} | |
| module: ${{ fromJSON(needs.define-test-matrix.outputs.modules) }} | |
| env: | |
| SNOWFLAKE_ACCOUNT: ${{ vars.SNOWFLAKE_CI_ACCOUNT }} | |
| SNOWFLAKE_USER: ${{ vars.SNOWFLAKE_CI_USERNAME }} | |
| SNOWFLAKE_DATABASE: ${{ vars.SNOWFLAKE_CI_DATABASE }} | |
| DATABRICKS_HOST: ${{ vars.DATABRICKS_CI_HOST }} | |
| DATABRICKS_HTTP_PATH: ${{ vars.DATABRICKS_CI_HTTP_PATH }} | |
| DATABRICKS_CATALOG: ${{ vars.DATABRICKS_CI_CATALOG }} | |
| REDSHIFT_HOST: ${{ vars.REDSHIFT_CI_HOST }} | |
| REDSHIFT_USERNAME: ${{ vars.REDSHIFT_CI_USERNAME }} | |
| REDSHIFT_DATABASE: "soda_test" | |
| REDSHIFT_PORT: "5439" | |
| ATHENA_S3_TEST_DIR: ${{ vars.ATHENA_CI_STAGING_DIR }} | |
| ATHENA_SCHEMA: ${{ vars.ATHENA_CI_SCHEMA }} | |
| ATHENA_WORKGROUP: ${{ vars.ATHENA_CI_WORKGROUP }} | |
| SQLSERVER_USERNAME: sa | |
| SQLSERVER_PASSWORD: Password1! | |
| SQLSERVER_DATABASE: master | |
| SQLSERVER_SCHEMA: dbo | |
| SYNAPSE_HOST: ${{ vars.MICROSOFT_SYNAPSE_CI_HOST }} | |
| SYNAPSE_DATABASE: sodacisynapse | |
| SYNAPSE_AUTHENTICATION_TYPE: activedirectoryserviceprincipal | |
| FABRIC_HOST: ${{ vars.MICROSOFT_FABRIC_CI_HOST }} | |
| FABRIC_DATABASE: soda-ci-fabric-warehouse | |
| FABRIC_AUTHENTICATION_TYPE: activedirectoryserviceprincipal | |
| SODA_CORE_TELEMETRY_LOCAL_TEST_MODE: "true" | |
| SODA_NIGHTLY: ${{ inputs.includeNightlyTests }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Resolve module | |
| run: | | |
| MODULE="${{ matrix.module }}" | |
| case "$MODULE" in | |
| trino-postgres) echo "TEST_MODULE=trino" >> "$GITHUB_ENV"; echo "TRINO_CATALOG=db" >> "$GITHUB_ENV" ;; | |
| trino-s3) echo "TEST_MODULE=trino" >> "$GITHUB_ENV"; echo "TRINO_CATALOG=iceberg" >> "$GITHUB_ENV" ;; | |
| *) echo "TEST_MODULE=$MODULE" >> "$GITHUB_ENV" ;; | |
| esac | |
| echo "CACHE_KEY=$MODULE" >> "$GITHUB_ENV" | |
| - name: Set up Python ${{ matrix.python-version }} | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: ${{ matrix.python-version }} | |
| - name: Set up UV | |
| uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5 | |
| - name: Install dependencies | |
| run: | | |
| curl https://packages.microsoft.com/keys/microsoft.asc | sudo apt-key add - | |
| curl https://packages.microsoft.com/config/ubuntu/21.04/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list > /dev/null | |
| sudo apt-get update | |
| ACCEPT_EULA=Y sudo apt-get install -y libsasl2-dev msodbcsql18 | |
| - name: Get external secrets | |
| uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # v2.0.10 | |
| env: | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BUILD_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BUILD_SECRET_ACCESS_KEY }} | |
| AWS_REGION: ${{ secrets.AWS_BUILD_DEFAULT_REGION }} | |
| with: | |
| secret-ids: | | |
| ,/soda/github/common/data-sources/envs | |
| parse-json-secrets: true | |
| - name: Get external secrets (special) | |
| uses: aws-actions/aws-secretsmanager-get-secrets@a9a7eb4e2f2871d30dc5b892576fde60a2ecc802 # v2.0.10 | |
| env: | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BUILD_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BUILD_SECRET_ACCESS_KEY }} | |
| AWS_REGION: ${{ secrets.AWS_BUILD_DEFAULT_REGION }} | |
| with: | |
| secret-ids: | | |
| BIGQUERY_ACCOUNT_INFO_JSON,/soda/github/common/data-sources/BIGQUERY_ACCOUNT_INFO_JSON | |
| - name: Configure Trino | |
| if: startsWith(matrix.module, 'trino') | |
| env: | |
| AWS_ACCESS_KEY_ID: ${{ secrets.AWS_BUILD_ACCESS_KEY_ID }} | |
| AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_BUILD_SECRET_ACCESS_KEY }} | |
| AWS_REGION: ${{ secrets.AWS_BUILD_DEFAULT_REGION }} | |
| TRINO_CI_ASSUME_ROLE_ARN: ${{ secrets.TRINO_CI_ASSUME_ROLE_ARN }} | |
| CI_MODULE: ${{ matrix.module }} | |
| run: bash scripts/start_trino_ci.sh | |
| # ── Restore nightly snapshots for PR replay ───────────────────── | |
| - name: Restore snapshot cache | |
| if: github.event_name == 'pull_request' | |
| id: snapshot-cache | |
| uses: actions/cache/restore@v4 | |
| with: | |
| path: .test_snapshots/ | |
| key: snapshots-${{ env.CACHE_KEY }}- | |
| restore-keys: | | |
| snapshots-${{ env.CACHE_KEY }}- | |
| - name: Check for replay override | |
| run: | | |
| if [ "${{ github.event_name }}" = "pull_request" ]; then | |
| git fetch origin "${{ github.event.pull_request.head.sha }}" --depth=1 | |
| COMMIT_MSG=$(git log -1 --pretty=%s FETCH_HEAD) | |
| else | |
| COMMIT_MSG=$(git log -1 --pretty=%s) | |
| fi | |
| if [[ "$COMMIT_MSG" == "[REPLAY=OFF]"* ]]; then | |
| echo "FORCE_SNAPSHOT_OFF=true" >> "$GITHUB_ENV" | |
| echo "Commit message starts with [REPLAY=OFF] — forcing snapshot mode off" | |
| fi | |
| - name: Determine snapshot mode | |
| run: | | |
| if [ "$FORCE_SNAPSHOT_OFF" = "true" ]; then | |
| echo "SODA_TEST_SNAPSHOT=off" >> "$GITHUB_ENV" | |
| echo "Snapshot replay disabled via [REPLAY=OFF] in commit message" | |
| # duckdb and sparkdf run fast enough without snapshots — always use real DB | |
| elif [ "${{ env.TEST_MODULE }}" = "duckdb" ] || [ "${{ env.TEST_MODULE }}" = "sparkdf" ]; then | |
| echo "SODA_TEST_SNAPSHOT=off" >> "$GITHUB_ENV" | |
| echo "Running ${{ env.TEST_MODULE }} tests against real DB (no snapshots)" | |
| elif [ "${{ github.event_name }}" = "pull_request" ] && [ -d ".test_snapshots" ]; then | |
| echo "SODA_TEST_SNAPSHOT=replay" >> "$GITHUB_ENV" | |
| echo "SODA_TEST_SNAPSHOT_FALLBACK=true" >> "$GITHUB_ENV" | |
| echo "Using snapshot replay with fallback for PR" | |
| else | |
| echo "SODA_TEST_SNAPSHOT=off" >> "$GITHUB_ENV" | |
| echo "Running tests against real databases" | |
| fi | |
| - name: Run tests | |
| run: | | |
| uv sync --locked --all-packages --group dev | |
| if [ "${{ matrix.module }}" = "sparkdf" ]; then | |
| sudo apt-get update | |
| sudo apt-get install -y openjdk-17-jdk | |
| export JAVA_HOME=/usr/lib/jvm/java-17-openjdk-amd64 | |
| export PATH=$JAVA_HOME/bin:$PATH | |
| echo "JAVA_HOME=$JAVA_HOME" >> "$GITHUB_ENV" | |
| echo "PATH=$PATH" >> "$GITHUB_ENV" | |
| fi | |
| export TEST_DATASOURCE=${{ env.TEST_MODULE }} | |
| uv run python -m pytest -ra soda-tests/tests/integration | |
| if [ "${{ env.TEST_MODULE }}" = "postgres" ]; then | |
| uv run python -m pytest -ra soda-tests/tests/unit | |
| uv run python -m pytest -ra soda-tests/tests/feature | |
| fi | |
| uv run python -m pytest -ra soda-${{ env.TEST_MODULE }}/tests | |
| if [ "${{ env.TEST_MODULE }}" = "databricks" ]; then | |
| # hive_metastore always runs without snapshots for now | |
| export SODA_TEST_SNAPSHOT=off | |
| unset SODA_TEST_SNAPSHOT_FALLBACK | |
| export DATABRICKS_CATALOG=hive_metastore | |
| echo "Changed DATABRICKS_CATALOG environment variable to hive_metastore" | |
| uv run python -m pytest -ra soda-tests/tests/integration | |
| fi | |
| # ── Run no_snapshot tests against real DB ─────────────────────── | |
| - name: Run no_snapshot tests | |
| if: env.SODA_TEST_SNAPSHOT == 'off' || env.SODA_TEST_SNAPSHOT == 'replay' | |
| env: | |
| SODA_TEST_SNAPSHOT: "off" | |
| TEST_DATASOURCE: ${{ env.TEST_MODULE }} | |
| run: | | |
| # Run tests marked no_snapshot against the real DB; exit 0 if none found. | |
| uv run python -m pytest -ra soda-tests/tests/integration -m "no_snapshot" \ | |
| --no-header -q || { | |
| rc=$? | |
| [ $rc -eq 5 ] && exit 0 | |
| exit $rc | |
| } | |
| - name: Dump Trino logs | |
| if: always() && startsWith(matrix.module, 'trino') | |
| run: | | |
| echo "=== Trino container logs ===" | |
| docker logs trino-ci 2>&1 | tail -100 | |
| - name: Stop Trino container | |
| if: always() && startsWith(matrix.module, 'trino') | |
| run: docker rm -f trino-ci 2>/dev/null || true | |
| define-matrix: | |
| if: | | |
| always() && !cancelled() && github.ref_name == 'main' | |
| && (needs.test.result == 'success' || needs.test.result == 'skipped') | |
| runs-on: ubuntu-24.04 | |
| needs: [test] | |
| outputs: | |
| modules: ${{ steps.modules.outputs.modules }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Define modules | |
| id: modules | |
| run: | | |
| echo modules=$(bash scripts/release_matrix.sh) >> "$GITHUB_OUTPUT" | |
| release-to-dev-pypi: | |
| # Mirror define-matrix's gating: without always() + an explicit | |
| # needs.<job>.result check, GitHub Actions' default success() evaluates | |
| # the *transitive* needs graph — so a [SKIP-TESTS] commit that skips | |
| # `test` also skips this job, defeating the SKIP-TESTS contract | |
| # (dev-pypi publish must still run on main). | |
| if: | | |
| always() && !cancelled() && github.ref_name == 'main' | |
| && needs.define-matrix.result == 'success' | |
| runs-on: ubuntu-24.04 | |
| needs: [define-matrix] | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| module: ${{ fromJSON(needs.define-matrix.outputs.modules) }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| - name: Set up Python 3.10 | |
| uses: actions/setup-python@v5 | |
| with: | |
| python-version: "3.10" | |
| - name: Set up UV | |
| uses: astral-sh/setup-uv@e58605a9b6da7c637471fab8847a5e5a6b8df081 # v5 | |
| - name: Debug GITHUB_REF | |
| run: echo "GITHUB_REF=$GITHUB_REF" | |
| - name: Release ${{ matrix.module }} | |
| run: | | |
| uv venv .venv | |
| source .venv/bin/activate | |
| uv pip install tbump build twine | |
| if [ "${GITHUB_REF#refs/tags/}" != "$GITHUB_REF" ]; then | |
| VERSION="${GITHUB_REF#refs/tags/}" | |
| echo "Using tag version: $VERSION" | |
| else | |
| CURRENT_VERSION_ROOT="$(tbump current-version | sed -E 's/((a|b|rc)[0-9]+|\.dev[0-9]+)$//')" | |
| echo "No tag found, bumping to dev version: $CURRENT_VERSION_ROOT" | |
| tbump --only-patch --non-interactive ${CURRENT_VERSION_ROOT}.dev${GITHUB_RUN_NUMBER} | |
| fi | |
| cd ${{ matrix.module }} | |
| python3 -m build | |
| # Stagger uploads so many packages don't hit PyPI at once (reduces 503s) | |
| - name: Stagger uploads | |
| run: | | |
| DELAY=$(echo -n "${{ matrix.module }}" | cksum | awk '{print $1 % 91}') | |
| echo "Staggering upload by ${DELAY}s..." | |
| sleep $DELAY | |
| - name: Publish package to pypi | |
| uses: pypa/gh-action-pypi-publish@ed0c53931b1dc9bd32cbe73a98c7f6766f8a527e # v1.13.0 | |
| with: | |
| packages-dir: ${{ matrix.module }}/dist | |
| user: ${{ secrets.DEV_PYPI_USERNAME }} | |
| password: ${{ secrets.DEV_PYPI_PASSWORD }} | |
| repository-url: ${{ secrets.DEV_PYPI_URL }} |