Skip to content

ci: bump Linux toolchain to GCC 16 (#379) #680

ci: bump Linux toolchain to GCC 16 (#379)

ci: bump Linux toolchain to GCC 16 (#379) #680

Workflow file for this run

name: Code Coverage
on:
push:
branches: [master]
pull_request:
branches: [master]
workflow_dispatch: {}
concurrency:
group: ${{ github.workflow }}-${{ github.event_name == 'pull_request' && github.ref || github.run_id }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
coverage:
runs-on: ubuntu-latest
# Advisory job: a failure here doesn't block the PR's required
# checks. The hosted runner (7 GB) consistently OOMs on
# instrumented builds for this project's current TU count —
# gcovr 8.6, gcovr 8.5, lcov serial, and lcov --parallel with a
# narrowed scope all hit the same "runner lost communication"
# signature, regardless of the report tool's memory profile.
# Until coverage moves to a runner with more headroom (or the
# codebase shrinks), keep the job present so it can capture
# data on the runs that do squeeze through, without making
# every PR red on the runs that don't.
#
# Side-effect to keep in mind: `continue-on-error` also softens
# the Codecov upload step's `fail_ci_if_error` — a misconfigured
# token or a corrupt `coverage.info` will now mark the upload
# step red without failing the job. Monitor uploads on the
# Codecov dashboard rather than relying on PR gates for it.
continue-on-error: true
# Ceiling for the whole job. With a hot ccache, the run finishes
# in ~25–30 min (build ~10–15 min on 86 %+ hits, tests ~7 min,
# report + upload ~5 min). A genuinely cold run — fresh cache
# version bump, key prefix change, or the first run after a
# compiler upgrade in the container image — pushes the build
# alone to ~80 min, so the cap has to clear that. 130 leaves
# margin without letting a wedged job burn the full 6-hour
# GHA default.
timeout-minutes: 130
container:
image: kthnode/gcc16-ubuntu24.04
steps:
- name: 📥 Checkout
uses: actions/checkout@9c091bb21b7c1c1d1991bb908d89e4e9dddfe3e0 # v7.0.0
with:
submodules: true
- name: 🔧 Setup Conan
uses: ./ci_utils/.github/actions/setup-conan
- name: 🔧 Setup kthbuild
uses: ./ci_utils/.github/actions/setup-kthbuild
- name: 🗄️ Setup ccache
run: |
apt-get update -qq && apt-get install -y -qq ccache > /dev/null 2>&1 || true
ccache --version || true
# Match the sister `build-with-container.yml` workflow's
# ccache config, which on the same hosted runner hits 86 %
# against the same 1074 TUs. PR #309 added
# `CCACHE_BASEDIR=$GITHUB_WORKSPACE` + `CCACHE_NOHASHDIR=1`
# here on the theory that coverage's `.gcno` files embed
# the cwd and the relativisation would help. In practice
# the inverse held: hit rate dropped to 0 / 1074 (every
# TU a miss, even for files that hadn't changed in
# months). Probable interaction with `--coverage`'s
# path bookkeeping in ccache 4.9.1; rather than chase
# that, drop the two envs and rely on absolute paths
# the way the other CI jobs do — Actions containers
# always check out at `/__w/kth/kth`, so absolute paths
# are stable across runs anyway.
# Split restore/save so a flaky GH Cache Service can't fail
# the whole job during the post-action. Recent runs have been
# losing the runner here for 18+ min as the save POSTs to
# `cache-service` time out — the build + tests had already
# passed, but the job ended up red. The save below carries
# `continue-on-error: true` so a cache-side failure becomes a
# non-fatal warning rather than a job failure.
- name: 🗄️ Restore ccache
id: restore-ccache
uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: /github/home/.cache/ccache
key: ccache-coverage-${{ github.sha }}
restore-keys: ccache-coverage-
- name: 📦 Install dependencies
run: |
conan install conanfile.py --version 0.0.0 -of build --build=missing \
-pr:b linux-ci-cd -pr:h linux-ci-cd \
-o march_id=ZLm9Pjh -o tests=True
- name: 🔨 Build with coverage
run: |
export PATH="/usr/lib/ccache:$PATH"
ccache --zero-stats
# Force -O0 on the coverage build. PR #309 dropped this
# to "cut test runtime by ~5×", but with -O3 + --coverage
# gcc 15 emits much larger .gcno/.gcda files (every inlined
# copy gets its own block bookkeeping), and gcovr 8.6 then
# hangs for 45+ min on report generation until the GHA
# runner kills the step. The last green coverage run on
# master (24851108475, 2026-04-23) was on -O0 with gcovr
# finishing in ~4 min. Slower tests are the right trade —
# this job's purpose is the report, not benchmarking.
cmake --preset conan-release \
-DCMAKE_VERBOSE_MAKEFILE=OFF \
-DGLOBAL_BUILD=ON \
-DENABLE_TEST=ON \
-DCMAKE_BUILD_TYPE=Release \
-DCMAKE_C_FLAGS="-O0 --coverage" \
-DCMAKE_CXX_FLAGS="-O0 --coverage" \
-DCMAKE_EXE_LINKER_FLAGS="--coverage" \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache
cmake --build --preset conan-release -j$(nproc)
ccache --show-stats
# Persist ccache as soon as the build finishes — the compile
# output is what populates it, so saving here gets the most
# benefit on every run (even ones where tests / report fail
# later). `continue-on-error` keeps a flaky GH Cache Service
# save from failing the whole job; the failure becomes a
# non-fatal warning.
- name: 🗄️ Save ccache
if: always() && steps.restore-ccache.outputs.cache-hit != 'true'
continue-on-error: true
uses: actions/cache/save@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: /github/home/.cache/ccache
key: ccache-coverage-${{ github.sha }}
- name: 🧪 Run tests
run: |
ctest --preset conan-release --output-on-failure -j$(nproc)
- name: 📊 Generate coverage report
if: always()
timeout-minutes: 20
run: |
# Switched off gcovr to lcov. gcovr 8.5 and 8.6 both blow
# past the GHA hosted runner's memory ceiling on this
# project's TU count: the in-memory coverage tree gcovr
# builds (every translation unit's parsed source +
# decision/branch records) is held entirely in resident
# memory before any output writer runs, and the OS kills
# the runner with "lost communication with the server"
# before the 20-min step timeout fires. Confirmed
# empirically — both versions hung past the timeout on
# this branch's coverage runs.
#
# lcov streams .gcda/.gcno through gcov subprocesses, so
# peak memory is bounded by the per-process residency
# rather than by the whole project tree. The first lcov
# attempt on this branch still ran past the 20-min
# timeout, but lcov was running gcov serially on every TU
# under build/, including the ~50% of subdirs (c-api,
# database, network, node, …) we filter out at extract
# time anyway. The configuration below narrows the
# capture scope to just the sub-projects we keep,
# invokes gcov in parallel ($(nproc) workers), drops
# branch coverage (smaller intermediate files, faster
# parse), and keeps geninfo_unexecuted_blocks so
# unreached functions still show as 0%. Codecov accepts
# `coverage.info` natively, so the upload contract is
# unchanged.
set -o pipefail
apt-get update -qq && apt-get install -y -qq lcov
lcov --version
# Pre-check the build subtrees exist before invoking lcov.
# If a future refactor renames a sub-project, lcov would
# warn-and-continue with a partial `coverage.info` that
# silently shrinks the reported surface — and since this
# job is advisory, the regression could go unnoticed.
for d in domain infrastructure blockchain; do
test -d "build/build/Release/src/$d" || {
echo "::error::missing coverage source dir: build/build/Release/src/$d";
exit 1;
}
done
# `--ignore-errors mismatch` keeps lcov 2.0 from aborting
# the whole capture when geninfo hits a "mismatched end
# line" between gcov output and source — known
# lcov-2.0-on-GCC-15 quirk, harmless for our usage.
# `branch_coverage=0` is the lcov-2.x spelling of the
# legacy `lcov_branch_coverage` option (still accepted,
# but emits a deprecation warning per invocation).
lcov --capture \
--directory build/build/Release/src/domain \
--directory build/build/Release/src/infrastructure \
--directory build/build/Release/src/blockchain \
--base-directory . \
--no-external \
--parallel $(nproc) \
--ignore-errors mismatch \
--rc branch_coverage=0 \
--rc geninfo_unexecuted_blocks=1 \
--output-file coverage.info
# Test sources still need pruning: they live alongside
# the headers we keep, so `--no-external` and the
# narrowed `--directory` set don't filter them.
lcov --remove coverage.info \
'*/test/*' '*/tests/*' \
--ignore-errors mismatch \
--rc branch_coverage=0 \
--output-file coverage.info
lcov --list coverage.info | tail -1
- name: ☁️ Upload to Codecov
if: always()
uses: codecov/codecov-action@0fb7174895f61a3b6b78fc075e0cd60383518dac # v5.5.5
with:
files: coverage.info
fail_ci_if_error: ${{ secrets.CODECOV_TOKEN != '' }}
token: ${{ secrets.CODECOV_TOKEN }}
disable_search: true