mirror of
https://github.com/Qortal/Brooklyn.git
synced 2025-02-01 07:42:18 +00:00
92 lines
2.5 KiB
Common Lisp
92 lines
2.5 KiB
Common Lisp
#define arg_t float16
|
|
#define result_t float16
|
|
#define int_t int16
|
|
#define uint_t uint16
|
|
|
|
#define CONCAT(a, b) a##b
|
|
#define CAT(a, b) CONCAT(a, b)
|
|
|
|
result_t approx_rootn(arg_t x, int_t n)
|
|
{
|
|
// Divides the exponent by n and emplaces it back into the number
|
|
// Adapted from: https://web.archive.org/web/20131227144655/http://metamerist.com/cbrt/cbrt.htm
|
|
int_t i = CAT(as_, int_t)(x);
|
|
int_t exp = (i - (int_t) (127 << 23)) / n + (int_t) (127 << 23);
|
|
return CAT(as_, result_t)((i & (int_t) 0x807FFFFF) | (exp));
|
|
}
|
|
|
|
result_t approx_cbrt(arg_t f)
|
|
{
|
|
// Adapted from: https://web.archive.org/web/20120111112244/http://metamerist.com/cbrt/CubeRoot.cpp
|
|
uint_t p = CAT(as_, uint_t)(f);
|
|
p = p / 3 + 709921077;
|
|
return CAT(as_, result_t)(p);
|
|
}
|
|
|
|
result_t cbrt_halley_step(arg_t x, arg_t base)
|
|
{
|
|
// Adapted from: https://web.archive.org/web/20120111112244/http://metamerist.com/cbrt/CubeRoot.cpp
|
|
result_t x3 = x * x * x;
|
|
return x * (x3 + base + base) / (x3 + x3 + base);
|
|
}
|
|
|
|
result_t cbrt_halley(arg_t val)
|
|
{
|
|
// Adapted from: https://web.archive.org/web/20120111112244/http://metamerist.com/cbrt/CubeRoot.cpp
|
|
arg_t abs = fabs(val);
|
|
arg_t approx = approx_rootn(abs, 3);
|
|
|
|
result_t result = approx;
|
|
#pragma loop unroll
|
|
for(int i = 0; i < 4; ++i) // TODO can be adapted for accuracy
|
|
{
|
|
result = cbrt_halley_step(result, val);
|
|
}
|
|
return copysign(result, val);
|
|
}
|
|
|
|
__kernel void cbrt_halley_kernel(__global arg_t *out, const __global arg_t *in)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = cbrt_halley(in[gid]);
|
|
}
|
|
|
|
result_t cbrt_newton_step(arg_t x, arg_t base)
|
|
{
|
|
// Adapted from: https://web.archive.org/web/20120111112244/http://metamerist.com/cbrt/CubeRoot.cpp
|
|
return x - (1.0f / 3.0f) * (x - base / (x * x));
|
|
}
|
|
|
|
result_t cbrt_newton(arg_t val)
|
|
{
|
|
// Adapted from: https://web.archive.org/web/20120111112244/http://metamerist.com/cbrt/CubeRoot.cpp
|
|
arg_t abs = fabs(val);
|
|
arg_t approx = approx_cbrt(abs);
|
|
|
|
result_t result = approx;
|
|
#pragma loop unroll
|
|
for(int i = 0; i < 4; ++i) // TODO can be adapted for accuracy
|
|
{
|
|
result = cbrt_newton_step(result, val);
|
|
}
|
|
return copysign(result, val);
|
|
}
|
|
|
|
__kernel void cbrt_newton_kernel(__global arg_t *out, const __global arg_t *in)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = cbrt_newton(in[gid]);
|
|
}
|
|
|
|
__kernel void cbrt_builtin_kernel(__global arg_t *out, const __global arg_t *in)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = cbrt(in[gid]);
|
|
}
|
|
|
|
__kernel void cbrt_pow_kernel(__global arg_t *out, const __global arg_t *in)
|
|
{
|
|
uint gid = get_global_id(0);
|
|
out[gid] = pow(in[gid], 1.0f / 3.0f);
|
|
}
|