mirror of
https://github.com/nomic-ai/kompute.git
synced 2026-06-30 03:17:12 +00:00
61 lines
1.8 KiB
Python
61 lines
1.8 KiB
Python
import kp
|
|
import numpy as np
|
|
|
|
|
|
def main():
|
|
mgr = kp.Manager()
|
|
|
|
tensor_size = 4
|
|
tensor_shape = [tensor_size, tensor_size]
|
|
tensor_in_1 = mgr.tensor(np.triu(np.ones(tensor_shape)))
|
|
tensor_in_2 = mgr.tensor(np.triu(np.ones(tensor_shape)))
|
|
tensor_out = mgr.tensor(np.zeros(tensor_shape))
|
|
|
|
print(f'Input tensors:\n'
|
|
f'{tensor_in_1.data().reshape(tensor_shape)}\n'
|
|
f'{tensor_in_2.data().reshape(tensor_shape)}\n')
|
|
|
|
params = [tensor_in_1, tensor_in_2, tensor_out]
|
|
|
|
matmul_shader = kp.Shader.compile_source('''
|
|
#version 450
|
|
|
|
layout (local_size_x = 1, local_size_y = 1) in;
|
|
|
|
layout (set = 0, binding = 0) readonly buffer buf_in_tensor_1 { float in_tensor_1[]; };
|
|
layout (set = 0, binding = 1) readonly buffer buf_in_tensor_2 { float in_tensor_2[]; };
|
|
layout (set = 0, binding = 2) writeonly buffer buf_out_tensor { float out_tensor[]; };
|
|
|
|
layout (constant_id = 0) const float tensor_size_f = 0;
|
|
|
|
|
|
void main()
|
|
{
|
|
uint globalRow = gl_GlobalInvocationID.x;
|
|
uint globalCol = gl_GlobalInvocationID.y;
|
|
uint tensor_size = uint(tensor_size_f);
|
|
float acc = 0.0;
|
|
for(uint k = 0u; k < tensor_size; k++)
|
|
acc += in_tensor_1[(k * tensor_size) + globalRow] * in_tensor_2[(globalCol * tensor_size) + k];
|
|
out_tensor[(globalCol * tensor_size) + globalRow] = acc;
|
|
}''')
|
|
|
|
algo = mgr.algorithm(
|
|
params, # params
|
|
matmul_shader, # spirv
|
|
(*tensor_shape, 1), # workgroup
|
|
[float(tensor_size)], # spec_consts
|
|
[]) # push_consts
|
|
|
|
(mgr.sequence()
|
|
.record(kp.OpTensorSyncDevice(params))
|
|
.record(kp.OpAlgoDispatch(algo))
|
|
.record(kp.OpTensorSyncLocal(params))
|
|
.eval())
|
|
|
|
print(f'Output :\n{tensor_out.data().reshape(tensor_shape)}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|