import os from pyshader import python2shader, f32, ivec3, Array from pyshader.stdlib import exp, log from kp import Tensor, Manager, Sequence DIRNAME = os.path.dirname(os.path.abspath(__file__)) def test_opmult(): """ Test basic OpMult operation """ tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) mgr = Manager() mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_mult_def([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] def test_opalgobase_data(): """ Test basic OpAlgoBase operation """ tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) mgr = Manager() shaderData = """ #version 450 layout (local_size_x = 1) in; // The input tensors bind index is relative to index in parameter passed layout(set = 0, binding = 0) buffer bina { float tina[]; }; layout(set = 0, binding = 1) buffer binb { float tinb[]; }; layout(set = 0, binding = 2) buffer bout { float tout[]; }; void main() { uint index = gl_GlobalInvocationID.x; tout[index] = tina[index] * tinb[index]; } """ mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_str_def([tensor_in_a, tensor_in_b, tensor_out], list(shaderData)) mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] def test_opalgobase_file(): """ Test basic OpAlgoBase operation """ tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) mgr = Manager() shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] def test_sequence(): """ Test basic OpAlgoBase operation """ mgr = Manager(0, [2]) tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) shaderFilePath = os.path.join(DIRNAME, "../../shaders/glsl/opmult.comp") mgr.eval_async_algo_file_def([tensor_in_a, tensor_in_b, tensor_out], shaderFilePath) mgr.eval_await_def() seq = mgr.create_sequence("op") seq.begin() seq.record_tensor_sync_local([tensor_in_a]) seq.record_tensor_sync_local([tensor_in_b]) seq.record_tensor_sync_local([tensor_out]) seq.end() seq.eval() assert tensor_out.data() == [2.0, 4.0, 6.0] def test_pyshader_pyshader(): @python2shader def compute_shader_multiply(index=("input", "GlobalInvocationId", ivec3), data1=("buffer", 0, Array(f32)), data2=("buffer", 1, Array(f32)), data3=("buffer", 2, Array(f32))): i = index.x data3[i] = data1[i] * data2[i] tensor_in_a = Tensor([2, 2, 2]) tensor_in_b = Tensor([1, 2, 3]) tensor_out = Tensor([0, 0, 0]) mgr = Manager() mgr.eval_tensor_create_def([tensor_in_a, tensor_in_b, tensor_out]) mgr.eval_algo_data_def([tensor_in_a, tensor_in_b, tensor_out], compute_shader_multiply.to_spirv()) mgr.eval_tensor_sync_local_def([tensor_out]) assert tensor_out.data() == [2.0, 4.0, 6.0] def test_logistic_regression_pyshader(): @python2shader def compute_shader( index = ("input", "GlobalInvocationId", ivec3), x_i = ("buffer", 0, Array(f32)), x_j = ("buffer", 1, Array(f32)), y = ("buffer", 2, Array(f32)), w_in = ("buffer", 3, Array(f32)), w_out_i = ("buffer", 4, Array(f32)), w_out_j = ("buffer", 5, Array(f32)), b_in = ("buffer", 6, Array(f32)), b_out = ("buffer", 7, Array(f32)), l_out = ("buffer", 8, Array(f32)), M = ("buffer", 9, Array(f32))): i = index.x m = M[0] w_curr = vec2(w_in[0], w_in[1]) b_curr = b_in[0] x_curr = vec2(x_i[i], x_j[i]) y_curr = y[i] z_dot = w_curr @ x_curr z = z_dot + b_curr y_hat = 1.0 / (1.0 + exp(-z)) d_z = y_hat - y_curr d_w = (1.0 / m) * x_curr * d_z d_b = (1.0 / m) * d_z loss = -((y_curr * log(y_hat)) + ((1.0 + y_curr) * log(1.0 - y_hat))) w_out_i[i] = d_w.x w_out_j[i] = d_w.y b_out[i] = d_b l_out[i] = loss # First we create input and ouput tensors for shader tensor_x_i = Tensor([0.0, 1.0, 1.0, 1.0, 1.0]) tensor_x_j = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) tensor_y = Tensor([0.0, 0.0, 0.0, 1.0, 1.0]) tensor_w_in = Tensor([0.001, 0.001]) tensor_w_out_i = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_w_out_j = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_b_in = Tensor([0.0]) tensor_b_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_l_out = Tensor([0.0, 0.0, 0.0, 0.0, 0.0]) tensor_m = Tensor([ 5.0 ]) # We store them in an array for easier interaction params = [tensor_x_i, tensor_x_j, tensor_y, tensor_w_in, tensor_w_out_i, tensor_w_out_j, tensor_b_in, tensor_b_out, tensor_l_out, tensor_m] mgr = Manager() mgr.eval_tensor_create_def(params) # Record commands for efficient evaluation sq = mgr.create_sequence() sq.begin() sq.record_tensor_sync_device([tensor_w_in, tensor_b_in]) sq.record_algo_data(params, compute_shader.to_spirv()) sq.record_tensor_sync_local([tensor_w_out_i, tensor_w_out_j, tensor_b_out, tensor_l_out]) sq.end() ITERATIONS = 100 learning_rate = 0.1 # Perform machine learning training and inference across all input X and Y for i_iter in range(ITERATIONS): sq.eval() # Calculate the parameters based on the respective derivatives calculated for j_iter in range(tensor_b_out.size()): tensor_w_in.set(0, tensor_w_in.get(0) - learning_rate * tensor_w_out_i.data()[j_iter]) tensor_w_in.set(1, tensor_w_in.get(1) - learning_rate * tensor_w_out_j.data()[j_iter]) tensor_b_in.set(0, tensor_b_in.get(0) - learning_rate * tensor_b_out.data()[j_iter]) assert tensor_w_in.data()[0] < 0.01 assert tensor_w_in.data()[0] > 0.0 assert tensor_w_in.data()[1] > 1.5 assert tensor_b_in.data()[0] < 0.7