mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-06 07:51:52 +00:00
Merge branch 'develop' into ck_tile/fa_train
This commit is contained in:
8
Jenkinsfile
vendored
8
Jenkinsfile
vendored
@@ -652,8 +652,8 @@ def process_results(Map conf=[:]){
|
||||
}
|
||||
|
||||
//launch develop branch daily at 23:00 UT in FULL_QA mode and at 19:00 UT with latest staging compiler version
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.1;COMPILER_VERSION=
|
||||
0 21 * * * % ROCMVERSION=6.1;COMPILER_VERSION=;COMPILER_COMMIT=
|
||||
CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;ROCMVERSION=6.1;
|
||||
0 21 * * * % ROCMVERSION=6.1;hipTensor_test=true
|
||||
0 19 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-staging;COMPILER_COMMIT=;USE_SCCACHE=false
|
||||
0 17 * * * % BUILD_DOCKER=true;DL_KERNELS=true;COMPILER_VERSION=amd-mainline-open;COMPILER_COMMIT=;USE_SCCACHE=false
|
||||
0 15 * * * % BUILD_INSTANCES_ONLY=true;RUN_CODEGEN_TESTS=false;RUN_PERFORMANCE_TESTS=false;USE_SCCACHE=false''' : ""
|
||||
@@ -701,8 +701,8 @@ pipeline {
|
||||
description: "Select whether to build DL kernels (default: OFF)")
|
||||
booleanParam(
|
||||
name: "hipTensor_test",
|
||||
defaultValue: true,
|
||||
description: "Use the CK build to verify hipTensor build and tests (default: ON)")
|
||||
defaultValue: false,
|
||||
description: "Use the CK build to verify hipTensor build and tests (default: OFF)")
|
||||
string(
|
||||
name: 'hipTensor_branch',
|
||||
defaultValue: 'mainline',
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
rocm-docs-core==1.2.0
|
||||
rocm-docs-core==1.2.1
|
||||
sphinxcontrib-bibtex==2.6.2
|
||||
|
||||
@@ -103,7 +103,7 @@ requests==2.31.0
|
||||
# via
|
||||
# pygithub
|
||||
# sphinx
|
||||
rocm-docs-core==1.2.0
|
||||
rocm-docs-core==1.2.1
|
||||
# via -r requirements.in
|
||||
six==1.16.0
|
||||
# via
|
||||
|
||||
@@ -59,7 +59,7 @@ struct MultiplyMultiply
|
||||
{
|
||||
const float x0_f = c * d0 * d1;
|
||||
|
||||
e = ck::type_convert<ck::bhalf_t>(x0_f);
|
||||
e = ck::type_convert<ck::half_t>(x0_f);
|
||||
}
|
||||
};
|
||||
|
||||
@@ -95,7 +95,7 @@ int main(int argc, char* argv[])
|
||||
ck::index_t K = 4096;
|
||||
|
||||
ck::index_t StrideA = K;
|
||||
ck::index_t StrideB = N;
|
||||
ck::index_t StrideB = K;
|
||||
ck::index_t StrideD = 0;
|
||||
ck::index_t StrideE = N;
|
||||
|
||||
@@ -164,10 +164,10 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
case 0: break;
|
||||
case 1:
|
||||
a0_m_k.GenerateTensorValue(GeneratorTensor_2<A0DataType>{-5, 5});
|
||||
b0_k_n.GenerateTensorValue(GeneratorTensor_2<B0DataType>{-5, 5});
|
||||
d0_m_n.GenerateTensorValue(GeneratorTensor_2<D0DataType>{-5, 5});
|
||||
d1_m_n.GenerateTensorValue(GeneratorTensor_2<D1DataType>{-5, 5});
|
||||
a0_m_k.GenerateTensorValue(GeneratorTensor_2<A0DataType>{-2, 2});
|
||||
b0_k_n.GenerateTensorValue(GeneratorTensor_2<B0DataType>{0, 2});
|
||||
d0_m_n.GenerateTensorValue(GeneratorTensor_2<D0DataType>{0, 2});
|
||||
d1_m_n.GenerateTensorValue(GeneratorTensor_2<D1DataType>{0, 2});
|
||||
break;
|
||||
default:
|
||||
a0_m_k.GenerateTensorValue(GeneratorTensor_3<A0DataType>{0.0, 1.0});
|
||||
|
||||
@@ -83,7 +83,7 @@ struct DeviceGemmMultiD_Xdl_CShuffle_V3 : public DeviceGemmMultipleD<ALayout,
|
||||
static constexpr index_t NumDTensor = DsDataType::Size();
|
||||
|
||||
// GridwiseGemm
|
||||
using GridwiseGemm = GridwiseGemm_xdl_cshuffle_v3<
|
||||
using GridwiseGemm = GridwiseGemmMultiD_xdl_cshuffle_v3<
|
||||
ALayout,
|
||||
BLayout,
|
||||
DsLayout,
|
||||
|
||||
@@ -146,7 +146,7 @@ template <typename ALayout,
|
||||
typename ComputeTypeB = ComputeTypeA,
|
||||
typename LDSTypeA = ADataType,
|
||||
typename LDSTypeB = BDataType>
|
||||
struct GridwiseGemm_xdl_cshuffle_v3
|
||||
struct GridwiseGemmMultiD_xdl_cshuffle_v3
|
||||
{
|
||||
static constexpr auto I0 = Number<0>{};
|
||||
static constexpr auto I1 = Number<1>{};
|
||||
@@ -690,8 +690,8 @@ struct GridwiseGemm_xdl_cshuffle_v3
|
||||
|
||||
constexpr auto a_lds_block_desc_permuted = transform_tensor_descriptor(
|
||||
a_lds_block_desc,
|
||||
make_tuple(make_xor_transform(make_tuple(Number<MPerBlock / MLdsLayer>{},
|
||||
Number<AK0Number * MLdsLayer>{})),
|
||||
make_tuple(make_xor_with_modulo_transform(make_tuple(
|
||||
Number<MPerBlock / MLdsLayer>{}, Number<AK0Number * MLdsLayer>{})),
|
||||
make_pass_through_transform(AK1Number)),
|
||||
make_tuple(Sequence<1, 0>{}, Sequence<2>{}),
|
||||
make_tuple(Sequence<1, 0>{}, Sequence<2>{}));
|
||||
@@ -756,7 +756,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
|
||||
make_tuple(
|
||||
make_pass_through_transform(Number<KThreadWrite / kfold / KThreadReadPerm>{}),
|
||||
make_pass_through_transform(Number<K0PerThreadWrite>{}),
|
||||
make_xor_transform(
|
||||
make_xor_with_modulo_transform(
|
||||
make_tuple(Number<KThreadReadPerm * M1>{}, Number<kfold * M0 / mpair>{})),
|
||||
make_pass_through_transform(Number<mpair>{}),
|
||||
make_pass_through_transform(AK1Number)),
|
||||
@@ -827,8 +827,8 @@ struct GridwiseGemm_xdl_cshuffle_v3
|
||||
|
||||
constexpr auto b_lds_block_desc_permuted = transform_tensor_descriptor(
|
||||
b_lds_block_desc,
|
||||
make_tuple(make_xor_transform(make_tuple(Number<NPerBlock / NLdsLayer>{},
|
||||
Number<BK0Number * NLdsLayer>{})),
|
||||
make_tuple(make_xor_with_modulo_transform(make_tuple(
|
||||
Number<NPerBlock / NLdsLayer>{}, Number<BK0Number * NLdsLayer>{})),
|
||||
make_pass_through_transform(BK1Number)),
|
||||
make_tuple(Sequence<1, 0>{}, Sequence<2>{}),
|
||||
make_tuple(Sequence<1, 0>{}, Sequence<2>{}));
|
||||
@@ -890,7 +890,7 @@ struct GridwiseGemm_xdl_cshuffle_v3
|
||||
make_tuple(
|
||||
make_pass_through_transform(Number<KThreadWrite / kfold / KThreadReadPerm>{}),
|
||||
make_pass_through_transform(Number<K0PerThreadWrite>{}),
|
||||
make_xor_transform(
|
||||
make_xor_with_modulo_transform(
|
||||
make_tuple(Number<KThreadReadPerm * N1>{}, Number<kfold * N0 / npair>{})),
|
||||
make_pass_through_transform(Number<npair>{}),
|
||||
make_pass_through_transform(BK1Number)),
|
||||
|
||||
Reference in New Issue
Block a user