Files
blis/bench/inputgemm.txt
Dipal M Zambare 8f310c3384 AOCL DTL - Added thread and execution time details in logs
-- Added number of threads used in DTL logs
    -- Added support for timestamps in DTL traces
    -- Added time taken by API at BLAS layer in the DTL logs
    -- Added GFLOPS achieved in DTL logs
    -- Added support to enable/disable execution time and
       gflops printing for individual API's. We may not want
       it for all API's. Also it will help us migrate API's
       to execution time and gflops logs in stages.
    -- Updated GEMM bench to match new logs
    -- Refactored aocldtl_blis.c to remove code duplication.
    -- Clean up logs generation and reading to use spaces
       consistently to separate various fields.
    -- Updated AOCL_gettid() to return correct thread id
       when using pthreads.

AMD-Internal: [CPUPL-1691]
Change-Id: Iddb8a3be2a5cd624a07ccdbf5ae0695799d8ae8e
2021-11-12 08:58:54 +05:30

33 lines
3.4 KiB
Plaintext

dgemm_ D N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 1542.854 ms 7.778 GFLOPS
dgemm_ D N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.307 ms 6.515 GFLOPS
dgemm_ D N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 32.442 ms 7.706 GFLOPS
dgemm_ D N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 172.170 ms 8.468 GFLOPS
dgemm_ D N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 655.381 ms 6.704 GFLOPS
dgemm_ D N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 1302.928 ms 7.541 GFLOPS
dgemm_ D T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 3278.541 ms 5.649 GFLOPS
dgemm_ D T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 5292.842 ms 5.904 GFLOPS
zgemm_ Z N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 300.940 ms 159.500 GFLOPS
zgemm_ Z N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.748 ms 10.695 GFLOPS
zgemm_ Z N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 8.618 ms 116.036 GFLOPS
zgemm_ Z N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 42.717 ms 136.526 GFLOPS
zgemm_ Z N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 124.652 ms 141.001 GFLOPS
zgemm_ Z N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 277.029 ms 141.877 GFLOPS
zgemm_ Z T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 494.360 ms 149.866 GFLOPS
zgemm_ Z T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 803.699 ms 155.531 GFLOPS
cgemm_ C N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 135.321 ms 354.712 GFLOPS
cgemm_ C N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.429 ms 18.648 GFLOPS
cgemm_ C N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 5.045 ms 198.216 GFLOPS
cgemm_ C N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 20.003 ms 291.556 GFLOPS
cgemm_ C N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 56.253 ms 312.446 GFLOPS
cgemm_ C N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 116.948 ms 336.081 GFLOPS
cgemm_ C T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 207.581 ms 356.911 GFLOPS
cgemm_ C T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 346.031 ms 361.239 GFLOPS
sgemm_ S N N 1000 3000 2000 0.900000 0.000000 4000 5000 -1.100000 0.000000 6000 nt=4 1024.360 ms 11.715 GFLOPS
sgemm_ S N N 100 100 100 0.900000 0.000000 104 104 -1.100000 0.000000 104 nt=4 0.362 ms 5.525 GFLOPS
sgemm_ S N N 500 500 500 0.900000 0.000000 504 504 -1.100000 0.000000 504 nt=4 1.688 ms 148.104 GFLOPS
sgemm_ S N N 900 900 900 0.900000 0.000000 904 904 -1.100000 0.000000 904 nt=4 147.791 ms 9.865 GFLOPS
sgemm_ S N N 1300 1300 1300 0.900000 0.000000 1304 1304 -1.100000 0.000000 1304 nt=4 451.156 ms 9.739 GFLOPS
sgemm_ S N T 1700 1700 1700 0.900000 0.000000 1704 1704 -1.100000 0.000000 1704 nt=4 873.577 ms 11.248 GFLOPS
sgemm_ S T N 2100 2100 2100 0.900000 0.000000 2104 2104 -1.100000 0.000000 2104 nt=4 1699.278 ms 10.900 GFLOPS
sgemm_ S T T 2500 2500 2500 0.900000 0.000000 2504 2504 -1.100000 0.000000 2504 nt=4 2651.917 ms 11.784 GFLOPS