mirror of
https://github.com/nomic-ai/kompute.git
synced 2026-06-29 19:07:13 +00:00
287 lines
7.9 KiB
Python
287 lines
7.9 KiB
Python
import os
|
|
|
|
import kp
|
|
import numpy as np
|
|
import logging
|
|
import pyshader as ps
|
|
|
|
from .utils import compile_source
|
|
|
|
DIRNAME = os.path.dirname(os.path.abspath(__file__))
|
|
|
|
kp_log = logging.getLogger("kp")
|
|
|
|
|
|
def test_end_to_end():
|
|
|
|
mgr = kp.Manager()
|
|
|
|
tensor_in_a = mgr.tensor([2, 2, 2])
|
|
tensor_in_b = mgr.tensor([1, 2, 3])
|
|
# Explicit type constructor supports int, in32, double, float and int
|
|
tensor_out_a = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
|
tensor_out_b = mgr.tensor_t(np.array([0, 0, 0], dtype=np.uint32))
|
|
|
|
params = [tensor_in_a, tensor_in_b, tensor_out_a, tensor_out_b]
|
|
|
|
shader = """
|
|
#version 450
|
|
|
|
layout (local_size_x = 1) in;
|
|
|
|
// The input tensors bind index is relative to index in parameter passed
|
|
layout(set = 0, binding = 0) buffer buf_in_a { float in_a[]; };
|
|
layout(set = 0, binding = 1) buffer buf_in_b { float in_b[]; };
|
|
layout(set = 0, binding = 2) buffer buf_out_a { uint out_a[]; };
|
|
layout(set = 0, binding = 3) buffer buf_out_b { uint out_b[]; };
|
|
|
|
// Kompute supports push constants updated on dispatch
|
|
layout(push_constant) uniform PushConstants {
|
|
float val;
|
|
} push_const;
|
|
|
|
// Kompute also supports spec constants on initalization
|
|
layout(constant_id = 0) const float const_one = 0;
|
|
|
|
void main() {
|
|
uint index = gl_GlobalInvocationID.x;
|
|
out_a[index] += uint( in_a[index] * in_b[index] );
|
|
out_b[index] += uint( const_one * push_const.val );
|
|
}
|
|
"""
|
|
|
|
workgroup = (3, 1, 1)
|
|
spec_consts = [2]
|
|
push_consts_a = [2]
|
|
push_consts_b = [3]
|
|
|
|
algo = mgr.algorithm(params, compile_source(shader), workgroup, spec_consts, push_consts_a)
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice(params))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpAlgoDispatch(algo, push_consts_b))
|
|
.eval())
|
|
|
|
sq = mgr.sequence()
|
|
sq.eval_async(kp.OpTensorSyncLocal(params))
|
|
|
|
sq.eval_await()
|
|
|
|
assert tensor_out_a.data().tolist() == [4, 8, 12]
|
|
assert tensor_out_b.data().tolist() == [10, 10, 10]
|
|
|
|
|
|
def test_shader_str():
|
|
"""
|
|
Test basic OpAlgoBase operation
|
|
"""
|
|
|
|
shader = """
|
|
#version 450
|
|
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
|
|
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
|
|
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
|
|
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
void main()
|
|
{
|
|
uint index = gl_GlobalInvocationID.x;
|
|
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
|
}
|
|
"""
|
|
|
|
spirv = compile_source(shader)
|
|
|
|
mgr = kp.Manager()
|
|
|
|
tensor_in_a = mgr.tensor([2, 2, 2])
|
|
tensor_in_b = mgr.tensor([1, 2, 3])
|
|
tensor_out = mgr.tensor([0, 0, 0])
|
|
|
|
params = [tensor_in_a, tensor_in_b, tensor_out]
|
|
|
|
algo = mgr.algorithm(params, spirv)
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice(params))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpTensorSyncLocal(params))
|
|
.eval())
|
|
|
|
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
|
|
|
|
|
|
def test_sequence():
|
|
"""
|
|
Test basic OpAlgoBase operation
|
|
"""
|
|
|
|
shader = """
|
|
#version 450
|
|
layout(set = 0, binding = 0) buffer tensorLhs {float valuesLhs[];};
|
|
layout(set = 0, binding = 1) buffer tensorRhs {float valuesRhs[];};
|
|
layout(set = 0, binding = 2) buffer tensorOutput { float valuesOutput[];};
|
|
layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
|
|
|
|
void main()
|
|
{
|
|
uint index = gl_GlobalInvocationID.x;
|
|
valuesOutput[index] = valuesLhs[index] * valuesRhs[index];
|
|
}
|
|
"""
|
|
|
|
spirv = compile_source(shader)
|
|
|
|
mgr = kp.Manager(0)
|
|
|
|
tensor_in_a = mgr.tensor([2, 2, 2])
|
|
tensor_in_b = mgr.tensor([1, 2, 3])
|
|
tensor_out = mgr.tensor([0, 0, 0])
|
|
|
|
params = [tensor_in_a, tensor_in_b, tensor_out]
|
|
|
|
algo = mgr.algorithm(params, spirv)
|
|
|
|
sq = mgr.sequence()
|
|
|
|
sq.record(kp.OpTensorSyncDevice(params))
|
|
sq.record(kp.OpAlgoDispatch(algo))
|
|
sq.record(kp.OpTensorSyncLocal(params))
|
|
|
|
sq.eval()
|
|
|
|
assert sq.is_init() == True
|
|
|
|
sq.destroy()
|
|
|
|
assert sq.is_init() == False
|
|
|
|
assert tensor_out.data().tolist() == [2.0, 4.0, 6.0]
|
|
assert np.all(tensor_out.data() == [2.0, 4.0, 6.0])
|
|
|
|
tensor_in_a.destroy()
|
|
tensor_in_b.destroy()
|
|
tensor_out.destroy()
|
|
|
|
assert tensor_in_a.is_init() == False
|
|
assert tensor_in_b.is_init() == False
|
|
assert tensor_out.is_init() == False
|
|
|
|
|
|
def test_pushconsts():
|
|
|
|
spirv = compile_source("""
|
|
#version 450
|
|
layout(push_constant) uniform PushConstants {
|
|
float x;
|
|
float y;
|
|
float z;
|
|
} pcs;
|
|
layout (local_size_x = 1) in;
|
|
layout(set = 0, binding = 0) buffer a { float pa[]; };
|
|
void main() {
|
|
pa[0] += pcs.x;
|
|
pa[1] += pcs.y;
|
|
pa[2] += pcs.z;
|
|
}
|
|
""")
|
|
|
|
mgr = kp.Manager()
|
|
|
|
tensor = mgr.tensor([0, 0, 0])
|
|
|
|
algo = mgr.algorithm([tensor], spirv, (1, 1, 1), [], [0.1, 0.2, 0.3])
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice([tensor]))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
|
|
.record(kp.OpAlgoDispatch(algo, [0.3, 0.2, 0.1]))
|
|
.record(kp.OpTensorSyncLocal([tensor]))
|
|
.eval())
|
|
|
|
assert np.allclose(tensor.data(), np.array([0.7, 0.6, 0.5], dtype=np.float32))
|
|
|
|
|
|
def test_pushconsts_int():
|
|
|
|
spirv = compile_source("""
|
|
#version 450
|
|
layout(push_constant) uniform PushConstants {
|
|
int x;
|
|
int y;
|
|
int z;
|
|
} pcs;
|
|
layout (local_size_x = 1) in;
|
|
layout(set = 0, binding = 0) buffer a { int pa[]; };
|
|
void main() {
|
|
pa[0] += pcs.x;
|
|
pa[1] += pcs.y;
|
|
pa[2] += pcs.z;
|
|
}
|
|
""")
|
|
|
|
mgr = kp.Manager()
|
|
|
|
tensor = mgr.tensor_t(np.array([0, 0, 0], dtype=np.int32))
|
|
|
|
spec_consts = np.array([], dtype=np.int32)
|
|
push_consts = np.array([-1, -1, -1], dtype=np.int32)
|
|
|
|
algo = mgr.algorithm([tensor], spirv, (1, 1, 1), spec_consts, push_consts)
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice([tensor]))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
|
|
.record(kp.OpAlgoDispatch(algo, np.array([-1, -1, -1], dtype=np.int32)))
|
|
.record(kp.OpTensorSyncLocal([tensor]))
|
|
.eval())
|
|
|
|
assert np.all(tensor.data() == np.array([-3, -3, -3], dtype=np.int32))
|
|
|
|
|
|
def test_workgroup():
|
|
mgr = kp.Manager(0)
|
|
|
|
tensor_a = mgr.tensor(np.zeros([16,8]))
|
|
tensor_b = mgr.tensor(np.zeros([16,8]))
|
|
|
|
@ps.python2shader
|
|
def compute_shader_wg(gl_idx=("input", "GlobalInvocationId", ps.ivec3),
|
|
gl_wg_id=("input", "WorkgroupId", ps.ivec3),
|
|
gl_wg_num=("input", "NumWorkgroups", ps.ivec3),
|
|
data1=("buffer", 0, ps.Array(ps.f32)),
|
|
data2=("buffer", 1, ps.Array(ps.f32))):
|
|
i = gl_wg_id.x * gl_wg_num.y + gl_wg_id.y
|
|
data1[i] = f32(gl_idx.x)
|
|
data2[i] = f32(gl_idx.y)
|
|
|
|
algo = mgr.algorithm([tensor_a, tensor_b], compute_shader_wg.to_spirv(), (16,8,1))
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice([tensor_a, tensor_b]))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpTensorSyncLocal([tensor_a, tensor_b]))
|
|
.eval())
|
|
|
|
print(tensor_a.data())
|
|
print(tensor_b.data())
|
|
|
|
assert np.all(tensor_a.data() == np.stack([np.arange(16)]*8, axis=1).ravel())
|
|
assert np.all(tensor_b.data() == np.stack([np.arange(8)]*16, axis=0).ravel())
|
|
|
|
|
|
def test_mgr_utils():
|
|
mgr = kp.Manager()
|
|
|
|
props = mgr.get_device_properties()
|
|
|
|
assert "device_name" in props
|
|
|
|
devices = mgr.list_devices()
|
|
|
|
assert len(devices) > 0
|
|
assert "device_name" in devices[0]
|