3
0
mirror of https://github.com/Qortal/Brooklyn.git synced 2025-02-12 02:05:54 +00:00
Raziel K. Crowe 1c4c363d5c VC4Stdlib
2022-09-09 19:57:08 +05:00

47 lines
1.4 KiB
Common Lisp

#define arg_t float16
#define result_t float16
#define int_t int16
#define uint_t uint16
#define CONCAT(a, b) a##b
#define CAT(a, b) CONCAT(a, b)
float16 vc4cl_lossy(ulong16) __attribute__((overloadable));
ulong16 vc4cl_add(ulong16, ulong16) __attribute__((overloadable));
ulong16 vc4cl_sub(ulong16, ulong16) __attribute__((overloadable));
ulong16 vc4cl_mul(float16, float16) __attribute__((overloadable));
ulong16 vc4cl_mul(ulong16, ulong16) __attribute__((overloadable));
ulong16 vc4cl_extend(float16 val) __attribute__((overloadable));
result_t fma_simple(arg_t in0, arg_t in1, arg_t in2)
{
return in0 * in1 * in2;
}
__kernel void fma_simple_kernel(
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
{
uint gid = get_global_id(0);
out[gid] = fma_simple(in0[gid], in1[gid], in2[gid]);
}
result_t fma_extended_precision(arg_t in0, arg_t in1, arg_t in2)
{
ulong16 mul = vc4cl_mul(in0, in1);
return vc4cl_lossy(vc4cl_add(mul, vc4cl_extend(in2)));
}
__kernel void fma_extended_precision_kernel(
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
{
uint gid = get_global_id(0);
out[gid] = fma_extended_precision(in0[gid], in1[gid], in2[gid]);
}
__kernel void fma_builtin_kernel(
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
{
uint gid = get_global_id(0);
out[gid] = fma(in0[gid], in1[gid], in2[gid]);
}