mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-02 20:51:23 +00:00
[CK_TILE] layernorm support fused-quant/fused-add (#1604)
* add prenorm/postnorm support, refactor using generate.py * update README * update README * fix format * update some description and fix format * update format * format * use non-raw for loading * format and update n4096 * dynamic-quant ready * update readme * support fused dynamic-quant * update fused-quant, with smooth * update README * update args * update some based on comment
This commit is contained in:
@@ -301,7 +301,10 @@ struct BlockReduce2D
|
||||
.get_static_tile_distribution_encoding(),
|
||||
ReduceDim{}));
|
||||
|
||||
return make_static_distributed_tensor<InDataType>(acc_dstr);
|
||||
auto dst_ = make_static_distributed_tensor<InDataType>(acc_dstr);
|
||||
// init acc_tensor
|
||||
tile_elementwise_inout([&](auto& x_) { x_ = type_convert<InDataType>(reduce_init); }, dst_);
|
||||
return dst_;
|
||||
}
|
||||
|
||||
// return number of pixels each lane need to reduce
|
||||
|
||||
Reference in New Issue
Block a user