278 lines
13 KiB
Lua
278 lines
13 KiB
Lua
local funcstring =
|
||
[[
|
||
get_work_dim() Returns the number of dimensions in use
|
||
get_global_size(uint dimindx) Returns the number of global work-items specified for dimension identified by dimindx
|
||
get_global_id(uint dimindx) Returns the unique global work-item ID value for dimension identified by dimindx
|
||
get_local_size(uint dimindx) Returns the number of local work-items specified in dimension identified by dimindx
|
||
get_local_id(uint dimindx) Returns the unique local work-item ID i.e. a work-item within a specific work-group for dimension identified by dimindx.
|
||
get_num_groups(uint dimindx) Returns the number of work-groups that will execute a kernel for dimension identified by dimindx
|
||
get_group_id(uint dimindx) Returns the work-group ID
|
||
acos(gentype) Arc cosine function
|
||
acosh(gentype) Inverse hyperbolic cosine
|
||
acospi(gentype) Compute acos (x) / PI
|
||
asin(gentype) Arc sine function
|
||
asinh(gentype) Inverse hyperbolic sine
|
||
asinpi(gentype x) Compute asin (x) / PI
|
||
atan(gentype y_over_x) Arc tangent function
|
||
atan2(gentype y, gentype x) Arc tangent of y / x
|
||
atanh(gentype) Hyperbolic arc tangent.
|
||
atanpi(gentype x) Compute atan (x) / PI
|
||
atan2pi(gentype y, gentype x) Compute atan2 (y, x) / PI
|
||
cbrt(gentype) Compute cube-root
|
||
ceil(gentype) Round to integral value using the round to +ve infinity rounding mode
|
||
copysign(gentype x, gentype y) Returns x with its sign changed to match the sign of y
|
||
cos(gentype) Compute cosine
|
||
cosh(gentype) Compute hyperbolic consine
|
||
cospi(gentype x) Compute cos (PI*x)
|
||
erfc(gentype) Complementary error function
|
||
erf(gentype) Error function encountered in integrating the normal distribution
|
||
exp(gentype x) Compute the base- e exponential of x
|
||
exp2(gentype) Exponential base 2 function
|
||
exp10(gentype) Exponential base 10 function
|
||
expm1(gentype x) Compute e^x - 1.0
|
||
fabs(gentype) Compute absolute value of a floating-point number
|
||
fdim(gentype x, gentype y) x - y if x > y, +0 if x is less than or equal to y
|
||
floor(gentype) Round to integral value using the round to –ve infinity rounding mode
|
||
fma(gentype a, gentype b, gentype c) Returns the correctly rounded floating-point representation of the sum of c with the infinitely precise product of a and b
|
||
fmax(gentype x, gentype y) Returns y if x < y, otherwise it returns x
|
||
fmin(gentype x, gentype y) Returns y if y < x, otherwise it returns x
|
||
fmod(gentype x, gentype y) Modulus. Returns x – y * trunc (x/y)
|
||
fract(gentype x, gentype *iptr) Returns fmin( x – floor (x), 0x1.fffffep-1f ).
|
||
frexp(gentype x, intn *exp) Extract mantissa and exponent from x
|
||
hypot(gentype x, gentype y) Compute the value of the square root of x2+y2
|
||
ilogb(gentype x) Return the exponent as an integer value
|
||
ldexp(gentype x, intn n) Multiply x by 2 to the power n
|
||
lgamma(gentype x) Returns the natural logarithm of the absolute value of the gamma function
|
||
lgamma_r(gentype x, intn *signp) Returns the natural logarithm of the absolute value of the gamma function
|
||
log(gentype) Compute natural logarithm
|
||
log2(gentype) Compute a base 2 logarithm
|
||
log10(gentype) Compute a base 10 logarithm
|
||
log1p(gentype x) Compute loge(1.0 + x)
|
||
logb(gentype x) Compute the exponent of x, which is the integral part of logr|x|
|
||
mad(gentype a, gentype b, gentype c) Approximates a * b + c.
|
||
modf(gentype x, gentype *iptr) Decompose a floating-point number
|
||
nan(uintn nancode) Returns a quiet NaN
|
||
nextafter(gentype x, gentype y) Computes the next representable single-precision floating-point value following x in the direction of y.
|
||
pow(gentype x, gentype y) Compute x to the power y
|
||
pown(gentype x, intn y) Compute x to the power y, where y is an integer
|
||
powr(gentype x, gentype y) Compute x to the power y, where x is >= 0
|
||
remainder(gentype x, gentype y) r = x - n*y, where n is the integer nearest the exact value of x/y
|
||
remquo(gentype x, gentype y, intn *quo) r = x - n*y, where n is the integer nearest the exact value of x/y
|
||
rint(gentype) Round to integral value (using round to nearest even rounding mode)
|
||
rootn(gentype x, intn y) Compute x to the power 1/y
|
||
round(gentype x) Return the integral value nearest to x rounding halfway cases away from zero
|
||
rsqrt(gentype) Compute inverse square root
|
||
sin(gentype) Compute sine
|
||
sincos(gentype x, gentype *cosval) Compute sine and cosine of x
|
||
sinh(gentype) Compute hyperbolic sine.
|
||
sinpi(gentype x) Compute sin (PI*x)
|
||
sqrt(gentype) Compute square root
|
||
tan(gentype) Compute tangent
|
||
tanh(gentype) Compute hyperbolic tangent
|
||
tanpi(gentype x) Compute tan (PI*x)
|
||
tgamma(gentype) Compute the gamma function
|
||
trunc(gentype) Round to integral value using the round to zero
|
||
abs(gentype x) Returns |x|
|
||
abs_diff(gentype x, gentype y) Returns |x – y| without modulo overflow
|
||
add_sat(gentype x, gentype y) Returns x + y and saturates the result
|
||
hadd(gentype x, gentype y) Returns (x + y) >> 1
|
||
rhadd(gentype x, gentype y) Returns (x + y + 1) >> 1
|
||
clz(gentype x) Returns the number of leading 0-bits in x, starting at the most significant bit position.
|
||
mad_hi(gentype a, gentype b, gentype c) Returns mul_hi(a, b) + c
|
||
mad_sat(gentype a, gentype b, gentype c) Returns a * b + c and saturates the result
|
||
max(gentype x, gentype y) Returns y if x < y, otherwise it returns x
|
||
min(gentype x, gentype y) Returns y if y < x, otherwise it returns x
|
||
mul_hi(gentype x, gentype y) Computes x * y and returns the high half of the product of x and y
|
||
rotate(gentype v, gentype i)
|
||
sub_sat(gentype x, gentype y) Returns x - y and saturates the result
|
||
upsample(charn hi, ucharn lo) result[i] = ((short)hi[i] << 8) | lo[i]
|
||
mad24(gentype x, gentype y, gentype z)
|
||
mul24(gentype x, gentype y)
|
||
clamp(gentype x, gentype minval, gentype maxval) Returns fmin(fmax(x, minval), maxval)
|
||
degrees(gentype radians) Converts radians to degrees
|
||
max(gentype x, gentype y)
|
||
min(gentype x, gentype y)
|
||
mix(gentype x, gentype y, gentype a) Returns the linear blend of x&y: x + (y – x) * a
|
||
radians(gentype degrees) Converts degrees to radians
|
||
step(gentype edge, gentype x) Returns 0.0 if x < edge, otherwise it returns 1.0
|
||
smoothstep(genType edge0, genType edge1, genType x)
|
||
sign(gentype x)
|
||
cross(float4 p0, float4 p1) Returns the cross product of p0.xyz and p1.xyz.
|
||
dot(gentype p0, gentype p1) Compute dot product
|
||
distance(gentype p0, gentype p1) Returns the distance between p0 and p1
|
||
length(gentype p) Return the length of vecto
|
||
normalize(gentype p) Returns a vector in the same direction as p but with length of 1.
|
||
fast_distance(gentype p0, gentype p1) Returns fast_length(p0 – p1).
|
||
fast_length(gentype p) Returns the length of vector
|
||
fast_normalize(gentype p) Returns a vector in the same direction as p but with length of 1.
|
||
read_imagef(image2d_t image, sampler_t sampler, int2 coord)
|
||
read_imagei(image2d_t image, sampler_t sampler, int2 coord)
|
||
read_imageui(image2d_t image, sampler_t sampler, int2 coord)
|
||
write_imagef(image2d_t image, int2 coord, float4 color)
|
||
write_imagei(image2d_t image, int2 coord, int4 color)
|
||
write_imageui(image2d_t image, int2 coord, unsigned int4 color)
|
||
get_image_width(image2d_t image)
|
||
get_image_width(image3d_t image)
|
||
get_image_height(image2d_t image)
|
||
get_image_height(image3d_t image)
|
||
get_image_channel_data_type(image2d_t image)
|
||
get_image_channel_data_type(image3d_t image)
|
||
get_image_channel_order(image2d_t image)
|
||
get_image_channel_order(image3d_t image)
|
||
get_image_dim(image2d_t image)
|
||
get_image_dim(image3d_t image)
|
||
barrier(cl_mem_fence_flags flags) All work-items in a work-group executing the kernel must execute this function before any are allowed to continue execution beyond the barrier.
|
||
mem_fence(cl_mem_fence_flags flags) Orders loads and stores of a work-item executing a kernel.
|
||
read_mem_fence(cl_mem_fence_flags flags) Read memory barrier that orders only loads.
|
||
write_mem_fence(cl_mem_fence_flags flags) Write memory barrier that orders only stores.
|
||
async_work_group_copy(gentype *dst, const gentype *src, size_t num_elements, event_t event) Perform an async copy of num_elements gentype elements from src to dst.
|
||
wait_group_events(int num_events, event_t *event_list) Wait for events that identify the async_work_group_copy operations to complete.
|
||
prefetch(const __global gentype *p, size_t num_elements) Prefetch num_elements * sizeof(gentype) bytes into the global cache.
|
||
vload2(size_t offset, const type *p) Read vector data from memory
|
||
vload4(size_t offset, const type *p) Read vector data from memory
|
||
vload8(size_t offset, const type *p) Read vector data from memory
|
||
vload16(size_t offset, const type *p) Read vector data from memory
|
||
vstore2(type2 data, size_t offset, type *p) Write vector data to memory
|
||
vstore4(type4 data, size_t offset, type *p) Write vector data to memory
|
||
vstore8(type8 data, size_t offset, type *p) Write vector data to memory
|
||
vstore16(type16 data, size_t offset, type *p) Write vector data to memory
|
||
]]
|
||
|
||
local function fn (description)
|
||
local description2,returns,args = description:match("(.+)%-%s*(%b())%s*(%b())")
|
||
if not description2 then
|
||
return {type="function",description=description,
|
||
returns="(?)"}
|
||
end
|
||
return {type="function",description=description2,
|
||
returns=returns:gsub("^%s+",""):gsub("%s+$",""), args = args}
|
||
end
|
||
|
||
local function val (description)
|
||
return {type="value",description = description}
|
||
end
|
||
-- docs
|
||
local api = {
|
||
}
|
||
|
||
|
||
local convtypes = [[bool char uchar short ushort int uint long ulong float double]]
|
||
local convout = {}
|
||
for i in convtypes:gmatch("([%w_]+)") do
|
||
local suffix = {"","_rte","_rtz","_rtp","_rtn"}
|
||
for k,t in ipairs(suffix) do
|
||
table.insert(convout,"convert_"..i..t)
|
||
table.insert(convout,"convert_"..i.."_sat"..t)
|
||
local vectors = {2,4,8,16}
|
||
for n,v in ipairs(vectors) do
|
||
table.insert(convout,"convert_"..i..v..t)
|
||
table.insert(convout,"convert_"..i..v.."_sat"..t)
|
||
end
|
||
end
|
||
end
|
||
convout = table.concat(convout, " ")
|
||
|
||
local astypes = [[int uint uchar ushort float double size_t ptrdiff_t intptr_t uintptr_t
|
||
long ulong char short unsigned
|
||
float2 float4 float8 float16
|
||
double2 double4 double8 double16
|
||
char2 char4 char8 char16
|
||
uchar2 uchar4 uchar8 uchar16
|
||
short2 short4 short8 short16
|
||
ushort2 ushort4 ushort8 ushort16
|
||
int2 int4 int8 int16
|
||
uint2 uint4 uint8 uint16
|
||
long2 long4 long8 long16
|
||
ulong2 ulong4 ulong8 ulong16]]
|
||
|
||
local astypeout = {}
|
||
for i in astypes:gmatch("([%w_]+)") do
|
||
table.insert(astypeout, "as_"..i)
|
||
end
|
||
astypeout = table.concat(astypeout, " ")
|
||
|
||
local keyw = astypeout.." "..convout.." "..[[
|
||
int uint uchar ushort half float bool double size_t ptrdiff_t intptr_t uintptr_t void
|
||
long ulong char short unsigned
|
||
half2 half4 half8 half16
|
||
float2 float4 float8 float16
|
||
double2 double4 double8 double16
|
||
char2 char4 char8 char16
|
||
uchar2 uchar4 uchar8 uchar16
|
||
short2 short4 short8 short16
|
||
ushort2 ushort4 ushort8 ushort16
|
||
int2 int4 int8 int16
|
||
uint2 uint4 uint8 uint16
|
||
long2 long4 long8 long16
|
||
ulong2 ulong4 ulong8 ulong16
|
||
image2d_t image3d_t sampler_t event_t cl_image_format
|
||
|
||
struct typedef void const
|
||
return switch case for do while if else break continue volatile
|
||
CLK_A CLK_R CLK_RG CLK_RGB CLK_RGBA CLK_ARGB CLK_BGRA CLK_INTENSITY CLK_LUMINANCE
|
||
|
||
MAXFLOAT HUGE_VALF INFINITY NAN
|
||
CLK_LOCAL_MEM_FENCE CLK_GLOBAL_MEM_FENCE
|
||
CLK_SNORM_INT8
|
||
CLK_SNORM_INT16
|
||
CLK_UNORM_INT8
|
||
CLK_UNORM_INT16
|
||
CLK_UNORM_SHORT_565
|
||
CLK_UNORM_SHORT_555
|
||
CLK_UNORM_SHORT_101010
|
||
CLK_SIGNED_INT8
|
||
CLK_SIGNED_INT16
|
||
CLK_SIGNED_INT32
|
||
CLK_UNSIGNED_INT8
|
||
CLK_UNSIGNED_INT16
|
||
CLK_UNSIGNED_INT32
|
||
CLK_HALF_FLOAT
|
||
CLK_FLOAT
|
||
__FILE__ __LINE__ __OPENCL_VERSION__ __ENDIAN_LITTLE__
|
||
__ROUNDING_MODE__ __IMAGE_SUPPORT__ __FAST_RELAXED_MATH__
|
||
|
||
__kernel kernel __attribute__ __read_only __write_only read_only write_only
|
||
__constant constant __local local __global global __private private
|
||
vec_type_hint work_group_size_hint reqd_work_group_size
|
||
aligned packed endian host device
|
||
|
||
async_work_group_copy wait_group_events prefetch
|
||
clamp min max degrees radians sign smoothstep step mix
|
||
mem_fence read_mem_fence write_mem_fence
|
||
cross prod distance dot length normalize fast_distance fast_length fast_normalize
|
||
read_image write_image get_image_width get_image_height get_image_depth
|
||
get_image_channel_data_type get_image_channel_order
|
||
get_image_dim
|
||
abs abs_diff add_sat clz hadd mad24 mad_hi mad_sat
|
||
mul24 mul_hi rhadd rotate sub_sat upsample
|
||
read_imagei write_imagei read_imageui write_imageui
|
||
read_imagef write_imagef
|
||
|
||
isequal isnotequal isgreater isgreaterequal isless islessequal islessgreater
|
||
isfinite isinf isnan isnormal isordered isunordered signbit any all bitselect select
|
||
|
||
acos acosh acospi asin asinh asinpi atan atan2 atanh atanpi atan2pi
|
||
cbrt ceil copysign cos half_cos native_cos cosh cospi half_divide native_divide
|
||
erf erfc exp half_exp native_exp exp2 half_exp2 native_exp2 exp10 half_exp10 native_exp10
|
||
expm1 fabs fdim floor fma fmax fmin fmod fract frexp hypot ilogb
|
||
ldexp lgamma lgamma_r log half_log native_log log2 half_log2 native_log2
|
||
log10 half_log10 native_log10 log1p logb mad modf nan nextafter
|
||
pow pown powr half_powr native_powr half_recip native_recip
|
||
remainder remquo rint round rootn rsqrt half_rsqrt native_rsqrt
|
||
sin half_sin native_sin sincos sinh sinpi sqrt half_sqrt native_sqrt
|
||
tan half_tan native_tan tanh tanpi tgamma trunc
|
||
|
||
barrier
|
||
vload2 vload4 vload8 vload16
|
||
vload_half vload_half2 vload_half4 vload_half8 vload_half16 vloada_half4 vloada_half8 vloada_half16
|
||
vstore2 vstore4 vstore8 vstore16
|
||
vstore_half vstore_half2 vstore_half4 vstore_half8 vstore_half16 vstorea_half4 vstorea_half8 vstorea_half16
|
||
get_global_id get_global_size get_group_id get_local_id get_local_size get_num_groups get_work_dim
|
||
]]
|
||
|
||
-- keywords - shouldn't be left out
|
||
for w in keyw:gmatch("([a-zA-Z_0-9]+)") do
|
||
api[w] = {type="keyword"}
|
||
end
|
||
|
||
return api |