Skip to content
59 changes: 51 additions & 8 deletions dace/runtime/include/dace/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,17 +96,60 @@ namespace dace
typedef std::complex<float> complex64;
typedef std::complex<double> complex128;
struct half {
// source: https://stackoverflow.com/a/26779139/15853075
half() {}

// IEEE-754 binary32 -> binary16, round-to-nearest-even.
// Based on (https://gist.github.com/rygorous/2156668)
half(float f) {
union { float fval; uint32_t x; } u;
u.fval = f;
h = ((u.x>>16)&0x8000)|((((u.x&0x7f800000)-0x38000000)>>13)&0x7c00)|((u.x>>13)&0x03ff);
union { float f; uint32_t u; } in;
in.f = f;
uint32_t sign = in.u & 0x80000000u;
in.u ^= sign; // work on the magnitude

if (in.u >= 0x47800000u) {
// Inf or NaN (exponent overflows half range): NaN -> qNaN, else Inf.
h = (in.u > 0x7f800000u) ? 0x7e00 : 0x7c00;
} else if (in.u < 0x38800000u) {
// Subnormal half or zero. Adding this magic constant and relying on
// round-to-nearest-even FP addition aligns the mantissa correctly.
union { uint32_t u; float f; } magic;
magic.u = 0x3f000000u; // 126 << 23
in.f += magic.f;
h = (uint16_t)(in.u - magic.u);
} else {
// Normal half: rebias exponent (127 -> 15) and round mantissa to even.
uint32_t mant_odd = (in.u >> 13) & 1;
in.u += 0xc8000000u + 0xfffu; // ((15 - 127) << 23) + rounding bias
in.u += mant_odd;
h = (uint16_t)(in.u >> 13);
}
h |= (uint16_t)(sign >> 16);
}
operator float() {
union { float f; uint32_t tmp; } u;
u.tmp = ((h&0x8000)<<16) | (((h&0x7c00)+0x1C000)<<13) | ((h&0x03FF)<<13);
return u.f;

// IEEE-754 binary16 -> binary32 (exact; every binary16 fits in binary32).
operator float() const {
uint32_t sign = (uint32_t)(h & 0x8000) << 16;
uint32_t exp = (h >> 10) & 0x1f;
uint32_t mant = h & 0x3ff;
union { uint32_t u; float f; } out;
if (exp == 0) {
if (mant == 0) {
out.u = sign; // signed zero
} else {
// Subnormal half: normalize into a float normal.
exp = 1;
do { exp--; mant <<= 1; } while ((mant & 0x400) == 0);
mant &= 0x3ff;
out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13);
}
} else if (exp == 0x1f) {
out.u = sign | 0x7f800000u | (mant << 13); // Inf or NaN
} else {
out.u = sign | ((exp + (127 - 15)) << 23) | (mant << 13); // normal
}
return out.f;
}

uint16_t h;
};
typedef half float16;
Expand Down
87 changes: 87 additions & 0 deletions tests/half_cpu_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# Copyright 2019-2026 ETH Zurich and the DaCe authors. All rights reserved.

import dace
import numpy as np

N = dace.symbol("N")


def _roundtrip_program():
"""A program that stores float32 input as float16 and reads it back as float32.

This forces both conversion directions of the host ``dace::half``:
``half(float)`` on the store and ``operator float()`` on the load.
"""

@dace.program
def f32_to_f16_to_f32(inp: dace.float32[N], out: dace.float32[N]):
tmp = np.ndarray([N], dace.float16)
for i in dace.map[0:N]:
with dace.tasklet:
a << inp[i]
t >> tmp[i]
t = a # float32 -> float16
for i in dace.map[0:N]:
with dace.tasklet:
t << tmp[i]
o >> out[i]
o = t # float16 -> float32

return f32_to_f16_to_f32


def test_float16_cpu_roundtrip_matches_numpy():
"""Host float32<->float16 conversion must agree with IEEE-754 (NumPy float16).

Covers zero, exact-rounding, small/subnormal magnitudes, inf, and NaN.
"""
prog = _roundtrip_program()
inp = np.array(
[
0.0,
-0.0,
1.0,
-1.0,
2.0,
3.0,
0.5,
-0.5,
1.25,
-2.5,
100.0,
1024.0,
65504.0,
1e-3,
6e-5,
6e-8,
-6e-8,

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add a subnormal number to your tests?

Copy link
Copy Markdown
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added inf and nan tests

np.inf,
-np.inf,
np.nan,
],
dtype=np.float32,
)
out = np.full(inp.shape, np.nan, dtype=np.float32)
prog(inp=inp, out=out, N=inp.size)

# NumPy's float16 is IEEE-754 round-to-nearest-even -- the reference.
expected = inp.astype(np.float16).astype(np.float32)
np.testing.assert_array_equal(out, expected)


def test_float16_cpu_random_roundtrip_matches_numpy():
"""Randomized sweep of the host conversion against NumPy float16."""
rng = np.random.default_rng(0)
inp = (rng.standard_normal(4096) * 10.0).astype(np.float32)
out = np.full(inp.shape, np.nan, dtype=np.float32)

prog = _roundtrip_program()
prog(inp=inp, out=out, N=inp.size)

expected = inp.astype(np.float16).astype(np.float32)
np.testing.assert_array_equal(out, expected)


if __name__ == "__main__":
test_float16_cpu_roundtrip_matches_numpy()
test_float16_cpu_random_roundtrip_matches_numpy()