diff --git a/example/ck_tile/18_flatmm/flatmm_basic.cpp b/example/ck_tile/18_flatmm/flatmm_basic.cpp index 540aebb3ed..e1a3b6054c 100644 --- a/example/ck_tile/18_flatmm/flatmm_basic.cpp +++ b/example/ck_tile/18_flatmm/flatmm_basic.cpp @@ -27,46 +27,18 @@ float flatmm_calc(const ck_tile::FlatmmHostArgs& args, const ck_tile::stream_con constexpr bool kPadK = false; constexpr int kBlockPerCu = 1; -#if defined(USING_MFMA_16x16x128) - constexpr ck_tile::index_t M_Tile = 128; - constexpr ck_tile::index_t N_Tile = 256; - constexpr ck_tile::index_t K_Tile = 256; - constexpr ck_tile::index_t M_Warp = 1; - constexpr ck_tile::index_t N_Warp = 4; - constexpr ck_tile::index_t K_Warp = 1; + constexpr ck_tile::index_t M_Tile = GemmConfig::M_Tile; + constexpr ck_tile::index_t N_Tile = GemmConfig::N_Tile; + constexpr ck_tile::index_t K_Tile = GemmConfig::K_Tile; - constexpr ck_tile::index_t M_Warp_Tile = 16; - constexpr ck_tile::index_t N_Warp_Tile = 16; - constexpr ck_tile::index_t K_Warp_Tile = 128; -#endif - // This part comes from the Codegen -#if defined(USING_MFMA_16x16x32) || defined(ENABLE_FP16) - constexpr ck_tile::index_t M_Tile = 128; - constexpr ck_tile::index_t N_Tile = 128; - constexpr ck_tile::index_t K_Tile = 128; + constexpr ck_tile::index_t M_Warp = GemmConfig::M_Warp; + constexpr ck_tile::index_t N_Warp = GemmConfig::N_Warp; + constexpr ck_tile::index_t K_Warp = GemmConfig::K_Warp; - constexpr ck_tile::index_t M_Warp = 1; - constexpr ck_tile::index_t N_Warp = 4; - constexpr ck_tile::index_t K_Warp = 1; - - constexpr ck_tile::index_t M_Warp_Tile = is_8bit_type::value ? 16 : 32; - constexpr ck_tile::index_t N_Warp_Tile = is_8bit_type::value ? 16 : 32; - constexpr ck_tile::index_t K_Warp_Tile = is_8bit_type::value ? 64 : 16; - -#elif defined(USING_MFMA_32x32x16) && defined(ENABLE_FP8) - constexpr ck_tile::index_t M_Tile = 128; - constexpr ck_tile::index_t N_Tile = 256; - constexpr ck_tile::index_t K_Tile = 128; - - constexpr ck_tile::index_t M_Warp = 1; - constexpr ck_tile::index_t N_Warp = 8; - constexpr ck_tile::index_t K_Warp = 1; - - constexpr ck_tile::index_t M_Warp_Tile = is_8bit_type::value ? 32 : 32; - constexpr ck_tile::index_t N_Warp_Tile = is_8bit_type::value ? 32 : 32; - constexpr ck_tile::index_t K_Warp_Tile = is_8bit_type::value ? 32 : 16; -#endif + constexpr ck_tile::index_t M_Warp_Tile = GemmConfig::M_Warp_Tile; + constexpr ck_tile::index_t N_Warp_Tile = GemmConfig::N_Warp_Tile; + constexpr ck_tile::index_t K_Warp_Tile = GemmConfig::K_Warp_Tile; using Traits = ck_tile::TileGemmTraits; diff --git a/example/ck_tile/18_flatmm/flatmm_basic.hpp b/example/ck_tile/18_flatmm/flatmm_basic.hpp index c1ea33c110..d74e396830 100644 --- a/example/ck_tile/18_flatmm/flatmm_basic.hpp +++ b/example/ck_tile/18_flatmm/flatmm_basic.hpp @@ -31,21 +31,6 @@ #error "unsupported CK_TILE_PIPELINE_DEFAULT value" #endif -template -struct GemmConfig -{ - static constexpr ck_tile::index_t M_Tile = 128; - static constexpr ck_tile::index_t N_Tile = 256; - static constexpr ck_tile::index_t K_Tile = 256; - - static constexpr ck_tile::index_t M_Warp = 1; - static constexpr ck_tile::index_t N_Warp = 4; - static constexpr ck_tile::index_t K_Warp = 1; - - static constexpr ck_tile::index_t M_Warp_Tile = 16; - static constexpr ck_tile::index_t N_Warp_Tile = 16; - static constexpr ck_tile::index_t K_Warp_Tile = 128; -}; template struct GemmBasicTypeConfig; @@ -124,6 +109,61 @@ struct is_8bit_type { }; +template +struct GemmConfig +{ +#if defined(USING_MFMA_16x16x128) + static constexpr ck_tile::index_t M_Tile = 128; + static constexpr ck_tile::index_t N_Tile = 256; + static constexpr ck_tile::index_t K_Tile = 256; + + static constexpr ck_tile::index_t M_Warp = 1; + static constexpr ck_tile::index_t N_Warp = 4; + static constexpr ck_tile::index_t K_Warp = 1; + + static constexpr ck_tile::index_t M_Warp_Tile = 16; + static constexpr ck_tile::index_t N_Warp_Tile = 16; + static constexpr ck_tile::index_t K_Warp_Tile = 128; +#elif defined(USING_MFMA_16x16x32) || defined(ENABLE_FP16) + static constexpr ck_tile::index_t M_Tile = 64; + static constexpr ck_tile::index_t N_Tile = 64; + static constexpr ck_tile::index_t K_Tile = 128; + + static constexpr ck_tile::index_t M_Warp = 1; + static constexpr ck_tile::index_t N_Warp = 4; + static constexpr ck_tile::index_t K_Warp = 1; + + static constexpr ck_tile::index_t M_Warp_Tile = is_8bit_type::value ? 16 : 32; + static constexpr ck_tile::index_t N_Warp_Tile = is_8bit_type::value ? 16 : 32; + static constexpr ck_tile::index_t K_Warp_Tile = is_8bit_type::value ? 64 : 16; + +#elif defined(USING_MFMA_32x32x16) && defined(ENABLE_FP8) + static constexpr ck_tile::index_t M_Tile = 128; + static constexpr ck_tile::index_t N_Tile = 256; + static constexpr ck_tile::index_t K_Tile = 128; + + static constexpr ck_tile::index_t M_Warp = 1; + static constexpr ck_tile::index_t N_Warp = 8; + static constexpr ck_tile::index_t K_Warp = 1; + + static constexpr ck_tile::index_t M_Warp_Tile = is_8bit_type::value ? 32 : 32; + static constexpr ck_tile::index_t N_Warp_Tile = is_8bit_type::value ? 32 : 32; + static constexpr ck_tile::index_t K_Warp_Tile = is_8bit_type::value ? 32 : 16; +#else + static constexpr ck_tile::index_t M_Tile = 128; + static constexpr ck_tile::index_t N_Tile = 256; + static constexpr ck_tile::index_t K_Tile = 256; + + static constexpr ck_tile::index_t M_Warp = 1; + static constexpr ck_tile::index_t N_Warp = 4; + static constexpr ck_tile::index_t K_Warp = 1; + + static constexpr ck_tile::index_t M_Warp_Tile = 16; + static constexpr ck_tile::index_t N_Warp_Tile = 16; + static constexpr ck_tile::index_t K_Warp_Tile = 128; +#endif +}; + auto create_args(int argc, char* argv[]) { ck_tile::ArgParser arg_parser;