mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-01 20:21:23 +00:00
* enable gfx940
* switch between intrinsic mfma routines on mi100/200 and mi300
* fix mfma_int8 on MI300
* disable 2 int8 examples on MI300
* Update cmake-ck-dev.sh
* restore gitignore file
* modify Jenkinsfile to the internal repo
* Bump rocm-docs-core from 0.24.0 to 0.29.0 in /docs/sphinx
Bumps [rocm-docs-core](https://github.com/RadeonOpenCompute/rocm-docs-core) from 0.24.0 to 0.29.0.
- [Release notes](https://github.com/RadeonOpenCompute/rocm-docs-core/releases)
- [Changelog](https://github.com/RadeonOpenCompute/rocm-docs-core/blob/develop/CHANGELOG.md)
- [Commits](https://github.com/RadeonOpenCompute/rocm-docs-core/compare/v0.24.0...v0.29.0)
---
updated-dependencies:
- dependency-name: rocm-docs-core
dependency-type: direct:production
update-type: version-update:semver-minor
...
Signed-off-by: dependabot[bot] <support@github.com>
* initial enablement of gfx950
* fix clang format
* disable examples 31 and 41 int8 on gfx950
* add code
* fix build wip
* fix xx
* now can build
* naming
* minor fix
* wip fix
* fix macro for exp2; fix warpgemm a/b in transposedC
* unify as tuple_array
* Update the required Python version to 3.9
* Update executable name in test scripts
* re-structure tuple/array to avoid spill
* Merge function templates
* Fix format
* Add constraint to array<> ctor
* Re-use function
* Some minor changes
* remove wrong code in store_raw()
* fix compile issue in transpose
* Rename enum
Rename 'cood_transform_enum' to 'coord_transform_enum'
* let more integral_constant->constant, and formating
* make sure thread_buffer can be tuple/array
* temp fix buffer_store spill
* not using custom data type by default, now we can have ISA-level same code as opt_padding
* fix compile error, fp8 not ready now
* fix fp8 duplicated move/shift/and/or problem
* Default use CK_TILE_FLOAT_TO_FP8_STOCHASTIC rounding mode
* fix scratch in fp8 kernel
* update some readme
* fix merge from upstream
* sync with upstream
* sync upstream again
* sync 22
* remove unused
* fix clang-format
* update README of ck_tile example
* fix several issue
* let python version to be 3.8 as minimal
* remove ck_tile example from default cmake target like all/install/check
* remove mistake
* 1).support receipe in generate.py 2).use simplified mask type 3).change left/right to pass into karg
* fix some bug in group-mode masking and codegen. update README
* F8 quantization for FMHA forward (#1224)
* Add SAccElementFunction, PComputeElementFunction, OAccElementFunction in pipeline
* Add element function to fmha api
* Adjust P elementwise function
* Fix bug of elementwise op, our elementwise op is not inout
* Add some elementwise op, prepare to quantization
* Let generate.py can generate different elementwise function
* To prevent compiler issue, remove the elementwise function we have not used.
* Remove f8 pipeline, we should share the same pipeline even in f8
* Remove remove_cvref_t
* Avoid warning
* Fix wrong fp8 QK/KV block gemm setting
* Check fp8 rounding error in check_err()
* Set fp8 rounding error for check_err()
* Use CK_TILE_FLOAT_TO_FP8_STANDARD as default fp8 rounding mode
* 1. codgen the f8 api and kernel
2. f8 host code
* prevent warning in filter mode
* Remove not-in-use elementwise function kargs
* Remove more not-in-use elementwise function kargs
* Small refinements in C++ source files
* Use conditional_t<> to simplify code
* Support heterogeneous argument for binary function types
* Re-use already-existing scales<> functor template
* Fix wrong value produced by saturating
* Generalize the composes<> template
* Unify saturates<> implementation
* Fix type errors in composes<>
* Extend less_equal<>
* Reuse the existing template less_equal<> in check_err()
* Add equal<float> & equal<double>
* Rename check_err() parameter
* Rename check_err() parameter
* Add FIXME comment for adding new macro in future
* Remove unnecessary cast to void
* Eliminate duplicated code
* Avoid dividing api pool into more than 2 groups
* Use more clear variable names
* Use affirmative condition in if stmt
* Remove blank lines
* Donot perfect forwarding in composes<>
* To fix compile error, revert generate.py back to 4439cc107d
* Fix bug of p element function
* Add compute element op to host softmax
* Remove element function in api interface
* Extract user parameter
* Rename pscale and oscale variable
* rename f8 to fp8
* rename more f8 to fp8
* Add pipeline::operator() without element_functor
* 1. Remove deprecated pipeline enum
2. Refine host code parameter
* Use quantization range as input
* 1. Rename max_dtype to dtype_max.
2. Rename scale to scale_s
3.Add init description
* Refine description
* prevent early return
* unify _squant kernel name in cpp, update README
* Adjust the default range.
* Refine error message and bias range
* Add fp8 benchmark and smoke test
* fix fp8 swizzle_factor=4 case
---------
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
Co-authored-by: carlushuang <carlus.huang@amd.com>
---------
Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: Jing Zhang <jizha@amd.com>
Co-authored-by: zjing14 <zhangjing14@gmail.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Po-Yen, Chen <PoYen.Chen@amd.com>
Co-authored-by: rocking <ChunYu.Lai@amd.com>
160 lines
5.4 KiB
C++
160 lines
5.4 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#pragma once
|
|
|
|
#include <ostream>
|
|
#include <string>
|
|
|
|
#include "ck_tile/core.hpp"
|
|
#include "ck_tile/ops/fmha.hpp"
|
|
|
|
// keep this in sync with ck_tile::GenericAttentionMaskEnum
|
|
enum class mask_enum
|
|
{
|
|
no_mask = 0,
|
|
mask_top_left,
|
|
mask_bottom_right,
|
|
window_generic,
|
|
};
|
|
|
|
struct mask_info
|
|
{
|
|
mask_enum type;
|
|
ck_tile::index_t y, x;
|
|
ck_tile::index_t left, right; // FA style SWA left/right
|
|
|
|
void serialize(std::ostream& os) const
|
|
{
|
|
if(type == mask_enum::no_mask)
|
|
os << "n";
|
|
else if(type == mask_enum::mask_top_left)
|
|
os << "t(" << left << ":" << right << ")";
|
|
else if(type == mask_enum::mask_bottom_right)
|
|
os << "b(" << left << ":" << right << ")";
|
|
else
|
|
{
|
|
os << "g(" << y << ":" << x << ")";
|
|
}
|
|
}
|
|
static mask_info decode(std::string str, ck_tile::index_t seqlen_q, ck_tile::index_t seqlen_k)
|
|
{
|
|
ck_tile::index_t x_total = seqlen_k;
|
|
ck_tile::index_t y_total = seqlen_q;
|
|
mask_info tmp;
|
|
auto found_0 = str.find(':');
|
|
if(found_0 != std::string::npos)
|
|
{
|
|
std::string t = str.substr(0, found_0);
|
|
std::string v = str.substr(found_0 + 1);
|
|
if(t == "xt" || t == "xb")
|
|
{
|
|
// xformer style sliding window attn from top-left
|
|
ck_tile::index_t window_size = atoi(v.c_str());
|
|
ck_tile::index_t left_size = -1;
|
|
ck_tile::index_t right_size = 0;
|
|
if(window_size > 0)
|
|
{
|
|
left_size = window_size / 2;
|
|
right_size = window_size - 1 - left_size;
|
|
}
|
|
auto r = ck_tile::make_generic_attention_mask_coordinates_from_lr_window(
|
|
left_size, right_size, y_total, x_total, t == "xt");
|
|
|
|
tmp.type = t == "xt" ? mask_enum::mask_top_left : mask_enum::mask_bottom_right;
|
|
tmp.y = r.at(ck_tile::number<0>{});
|
|
tmp.x = r.at(ck_tile::number<1>{});
|
|
tmp.left = left_size;
|
|
tmp.right = right_size;
|
|
}
|
|
else
|
|
{
|
|
auto found_1 = v.find(",");
|
|
if(found_1 == std::string::npos)
|
|
{
|
|
printf("not supported value %s, %s\n", v.c_str(), str.c_str());
|
|
assert(0);
|
|
}
|
|
tmp.type = mask_enum::window_generic;
|
|
ck_tile::index_t v0 = atoi(v.substr(0, found_1).c_str());
|
|
ck_tile::index_t v1 = atoi(v.substr(found_1 + 1).c_str());
|
|
// TODO: some validation
|
|
if(t == "t")
|
|
{
|
|
tmp.type = mask_enum::mask_top_left;
|
|
auto r = ck_tile::make_generic_attention_mask_coordinates_from_lr_window(
|
|
v0, v1, y_total, x_total, true);
|
|
tmp.y = r.at(ck_tile::number<0>{});
|
|
tmp.x = r.at(ck_tile::number<1>{});
|
|
tmp.left = v0;
|
|
tmp.right = v1;
|
|
}
|
|
else if(t == "b")
|
|
{
|
|
tmp.type = mask_enum::mask_bottom_right;
|
|
auto r = ck_tile::make_generic_attention_mask_coordinates_from_lr_window(
|
|
v0, v1, y_total, x_total, false);
|
|
tmp.y = r.at(ck_tile::number<0>{});
|
|
tmp.x = r.at(ck_tile::number<1>{});
|
|
tmp.left = v0;
|
|
tmp.right = v1;
|
|
}
|
|
else if(t == "g")
|
|
{
|
|
tmp.y = v0;
|
|
tmp.x = v1;
|
|
tmp.left = v0; // TODO: don't use this?
|
|
tmp.right = v1;
|
|
}
|
|
else
|
|
{
|
|
printf("not supported type %s, %s\n", t.c_str(), str.c_str());
|
|
assert(0);
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
auto set_causal_top_left = [&]() {
|
|
tmp.type = mask_enum::mask_top_left;
|
|
tmp.y = seqlen_q;
|
|
tmp.x = 1;
|
|
tmp.left = -1;
|
|
tmp.right = 0;
|
|
};
|
|
auto set_causal_bottom_right = [&]() {
|
|
tmp.type = mask_enum::mask_bottom_right;
|
|
tmp.y = seqlen_q;
|
|
tmp.x = seqlen_k - seqlen_q + 1;
|
|
tmp.left = -1;
|
|
tmp.right = 0;
|
|
};
|
|
if(str == "t")
|
|
set_causal_top_left();
|
|
else if(str == "b")
|
|
set_causal_bottom_right();
|
|
else
|
|
{
|
|
tmp.type = static_cast<mask_enum>(atoi(str.c_str()));
|
|
if(tmp.type == mask_enum::mask_top_left)
|
|
{
|
|
set_causal_top_left();
|
|
}
|
|
else if(tmp.type == mask_enum::mask_bottom_right)
|
|
{
|
|
set_causal_bottom_right();
|
|
}
|
|
}
|
|
}
|
|
return tmp;
|
|
}
|
|
|
|
friend std::ostream& operator<<(std::ostream& os, const mask_info& mi);
|
|
};
|
|
|
|
inline std::ostream& operator<<(std::ostream& os, const mask_info& mi)
|
|
{
|
|
mi.serialize(os);
|
|
return os;
|
|
}
|