mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-02-12 02:05:54 +00:00
47 lines
1.4 KiB
Common Lisp
47 lines
1.4 KiB
Common Lisp
#define arg_t float16
|
|
#define result_t float16
|
|
#define int_t int16
|
|
#define uint_t uint16
|
|
|
|
#define CONCAT(a, b) a##b
|
|
#define CAT(a, b) CONCAT(a, b)
|
|
|
|
float16 vc4cl_lossy(ulong16) __attribute__((overloadable));
|
|
ulong16 vc4cl_add(ulong16, ulong16) __attribute__((overloadable));
|
|
ulong16 vc4cl_sub(ulong16, ulong16) __attribute__((overloadable));
|
|
ulong16 vc4cl_mul(float16, float16) __attribute__((overloadable));
|
|
ulong16 vc4cl_mul(ulong16, ulong16) __attribute__((overloadable));
|
|
ulong16 vc4cl_extend(float16 val) __attribute__((overloadable));
|
|
|
|
result_t fma_simple(arg_t in0, arg_t in1, arg_t in2)
|
|
{
|
|
return in0 * in1 * in2;
|
|
}
|
|
|
|
__kernel void fma_simple_kernel(
|
|
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = fma_simple(in0[gid], in1[gid], in2[gid]);
|
|
}
|
|
|
|
result_t fma_extended_precision(arg_t in0, arg_t in1, arg_t in2)
|
|
{
|
|
ulong16 mul = vc4cl_mul(in0, in1);
|
|
return vc4cl_lossy(vc4cl_add(mul, vc4cl_extend(in2)));
|
|
}
|
|
|
|
__kernel void fma_extended_precision_kernel(
|
|
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = fma_extended_precision(in0[gid], in1[gid], in2[gid]);
|
|
}
|
|
|
|
__kernel void fma_builtin_kernel(
|
|
__global arg_t *out, const __global arg_t *in0, const __global arg_t *in1, const __global arg_t *in2)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = fma(in0[gid], in1[gid], in2[gid]);
|
|
}
|