mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-05 19:40:19 +00:00
Slightly better PP
This commit is contained in:
@@ -1235,7 +1235,9 @@ void launch_fattn_mma(
|
||||
|
||||
const int nblocks_stream_k = max_blocks;
|
||||
|
||||
const bool use_stream_k = cc >= CC_ADA_LOVELACE || tiles_efficiency_percent < 75;
|
||||
//const bool use_stream_k = cc >= CC_ADA_LOVELACE || tiles_efficiency_percent < 75;
|
||||
// On my RTX-4080 the above is slightly slower for PP. It would be useful to try and see what happens on Blackwell
|
||||
const bool use_stream_k = tiles_efficiency_percent < 75;
|
||||
|
||||
blocks_num.x = use_stream_k ? nblocks_stream_k : ntiles_total;
|
||||
blocks_num.y = 1;
|
||||
|
||||
Reference in New Issue
Block a user