mirror of
https://github.com/nomic-ai/kompute.git
synced 2026-06-30 03:17:12 +00:00
57 lines
2.2 KiB
Python
57 lines
2.2 KiB
Python
import time
|
|
|
|
import kp
|
|
import numpy as np
|
|
from imp1_naive import MatMulOp as MatMulOp1
|
|
from imp2_tiled import MatMulOp as MatMulOp2
|
|
from imp3_better_tiling import MatMulOp as MatMulOp3
|
|
|
|
|
|
def main():
|
|
mgr = kp.Manager()
|
|
for tensor_size, experiment_count in [(512, 1000), (4096, 5)]:
|
|
tensor_shape = [tensor_size, tensor_size]
|
|
tensor_shape = [tensor_size, tensor_size]
|
|
mat_1 = np.triu(np.ones(tensor_shape))
|
|
mat_2 = np.triu(np.ones(tensor_shape))
|
|
|
|
tensor_in_1 = mgr.tensor(mat_1)
|
|
tensor_in_2 = mgr.tensor(mat_2)
|
|
tensor_out = mgr.tensor(np.zeros(tensor_shape))
|
|
if tensor_size <= 512:
|
|
mat_result = mat_1 @ mat_2
|
|
else:
|
|
MatMulOp1(mgr)(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)
|
|
mat_result = tensor_out.data().reshape(tensor_shape) # CPU is too slow for big sizes
|
|
|
|
print(f'{tensor_shape} input tensors:\n'
|
|
f'{mat_1}\n'
|
|
f'{mat_2}\n')
|
|
print(f'Output :\n{mat_result}')
|
|
|
|
for MatMulOp in [MatMulOp1, MatMulOp2, MatMulOp3]:
|
|
tensor_out.data()[:] = 0
|
|
mgr.sequence().record(kp.OpTensorSyncDevice([tensor_out]))
|
|
matmul_op = MatMulOp(mgr)
|
|
matmul_op(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)
|
|
|
|
start_time = time.time()
|
|
for _ in range(experiment_count):
|
|
matmul_op(tensor_shape, tensor_in_1, tensor_in_2, tensor_out)
|
|
end_time = time.time()
|
|
experiment_time = end_time - start_time
|
|
op_count = tensor_shape[0] * tensor_shape[1] * ((tensor_shape[1] * 2) - 1)
|
|
|
|
# print(tensor_out.data().reshape(tensor_shape))
|
|
if (tensor_out.data().reshape(tensor_shape) == mat_result).all():
|
|
print(f'From {MatMulOp.__module__} : {experiment_count} matmul time : '
|
|
f'{experiment_time * 1000:0.2f}ms => '
|
|
f'{experiment_count / experiment_time:0.2f}op/s or '
|
|
f'{experiment_count * op_count / (1e9 * experiment_time):0.2f} GFLOPS')
|
|
else:
|
|
print(f'Test failed => output tensor is wrong :\n{tensor_out.data().reshape(tensor_shape)}')
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main()
|