diff --git a/Config.cmake.in b/Config.cmake.in index 02978cd4dd..2861a28f49 100644 --- a/Config.cmake.in +++ b/Config.cmake.in @@ -1,6 +1,6 @@ @PACKAGE_INIT@ -set(_composable_kernel_supported_components device_operations utility) +set(_composable_kernel_supported_components device_other_operations device_gemm_operations device_conv_operations device_mha_operations device_contraction_operations device_reduction_operations utility) foreach(_comp ${composable_kernel_FIND_COMPONENTS}) if(NOT _comp IN_LIST _composable_kernel_supported_components) diff --git a/client_example/01_gemm/CMakeLists.txt b/client_example/01_gemm/CMakeLists.txt index 9e741192f9..6c4103cda8 100644 --- a/client_example/01_gemm/CMakeLists.txt +++ b/client_example/01_gemm/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_gemm gemm.cpp) -target_link_libraries(client_gemm PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations) diff --git a/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt index ba29520222..772b699955 100644 --- a/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt +++ b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt @@ -1,13 +1,13 @@ add_custom_target(client_gemm_fastgelu_examples) add_executable(client_gemm_add_add_fastgelu gemm_add_add_fastgelu.cpp) -target_link_libraries(client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_gemm_operations) add_executable(client_gemm_add_fastgelu gemm_add_fastgelu.cpp) -target_link_libraries(client_gemm_add_fastgelu PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_fastgelu PRIVATE composable_kernel::device_gemm_operations) add_executable(client_gemm_fastgelu gemm_fastgelu.cpp) -target_link_libraries(client_gemm_fastgelu PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations) add_dependencies(client_gemm_fastgelu_examples client_gemm_add_add_fastgelu client_gemm_add_fastgelu client_gemm_fastgelu) @@ -15,13 +15,13 @@ add_dependencies(client_gemm_fastgelu_examples client_gemm_add_add_fastgelu clie add_custom_target(client_gemm_fastgelu_generic_examples) add_executable(client_gemm_add_add_fastgelu_generic gemm_add_add_fastgelu_generic.cpp) -target_link_libraries(client_gemm_add_add_fastgelu_generic PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_add_fastgelu_generic composable_kernel::device_gemm_operations) add_executable(client_gemm_add_fastgelu_generic gemm_add_fastgelu_generic.cpp) -target_link_libraries(client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations) add_executable(client_gemm_fastgelu_generic gemm_fastgelu_generic.cpp) -target_link_libraries(client_gemm_fastgelu_generic PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations) add_dependencies(client_gemm_fastgelu_generic_examples client_gemm_add_add_fastgelu_generic client_gemm_add_fastgelu_generic client_gemm_fastgelu_generic) diff --git a/client_example/03_gemm_layernorm/CMakeLists.txt b/client_example/03_gemm_layernorm/CMakeLists.txt index b38698d906..94b4576f64 100644 --- a/client_example/03_gemm_layernorm/CMakeLists.txt +++ b/client_example/03_gemm_layernorm/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp) -target_link_libraries(client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations) add_executable(client_gemm_add_relu_add_layernorm_welford gemm_add_relu_add_layernorm_welford.cpp) -target_link_libraries(client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations) diff --git a/client_example/04_contraction/CMakeLists.txt b/client_example/04_contraction/CMakeLists.txt index 7ffedfeef3..cd4a95124c 100644 --- a/client_example/04_contraction/CMakeLists.txt +++ b/client_example/04_contraction/CMakeLists.txt @@ -1,15 +1,15 @@ add_executable(client_contraction_scale_fp32 contraction_scale_fp32.cpp) -target_link_libraries(client_contraction_scale_fp32 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations) add_executable(client_contraction_bilinear_fp32 contraction_bilinear_fp32.cpp) -target_link_libraries(client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations) add_executable(client_contraction_scale_fp64 contraction_scale_fp64.cpp) -target_link_libraries(client_contraction_scale_fp64 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_contraction_scale_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations) add_executable(client_contraction_bilinear_fp64 contraction_bilinear_fp64.cpp) -target_link_libraries(client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations) add_executable(contraction_g1m2n3k1_add_xdl_fp16 contraction_g1m2n3k1_add_xdl_fp16.cpp) -target_link_libraries(contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_operations) +target_link_libraries(contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations) diff --git a/client_example/05_layernorm/CMakeLists.txt b/client_example/05_layernorm/CMakeLists.txt index 642eae16d3..9cbfc2b763 100644 --- a/client_example/05_layernorm/CMakeLists.txt +++ b/client_example/05_layernorm/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_layernorm2d_fwd layernorm2d_fwd.cpp) -target_link_libraries(client_layernorm2d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_layernorm2d_fwd PRIVATE composable_kernel::device_other_operations) add_executable(client_layernorm4d_fwd layernorm4d_fwd.cpp) -target_link_libraries(client_layernorm4d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_layernorm4d_fwd PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/06_softmax/CMakeLists.txt b/client_example/06_softmax/CMakeLists.txt index b38a0fd9e2..24d30f475e 100644 --- a/client_example/06_softmax/CMakeLists.txt +++ b/client_example/06_softmax/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_softmax4d softmax4d.cpp) -target_link_libraries(client_softmax4d PRIVATE composable_kernel::device_operations) +target_link_libraries(client_softmax4d PRIVATE composable_kernel::device_other_operations composable_kernel::device_reduction_operations) diff --git a/client_example/07_grouped_convnd_fwd/CMakeLists.txt b/client_example/07_grouped_convnd_fwd/CMakeLists.txt index fce7e91c1e..40f1bba064 100644 --- a/client_example/07_grouped_convnd_fwd/CMakeLists.txt +++ b/client_example/07_grouped_convnd_fwd/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_grouped_conv2d_fwd grouped_conv2d_fwd.cpp) -target_link_libraries(client_grouped_conv2d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv2d_fwd PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp) -target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/08_fused_attention/CMakeLists.txt b/client_example/08_fused_attention/CMakeLists.txt index 862b9ed5b7..9472be07b5 100644 --- a/client_example/08_fused_attention/CMakeLists.txt +++ b/client_example/08_fused_attention/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_fused_attention fused_attention.cpp) -target_link_libraries(client_fused_attention PRIVATE composable_kernel::device_operations) +target_link_libraries(client_fused_attention PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_fused_attention_bias fused_attention_bias.cpp) -target_link_libraries(client_fused_attention_bias PRIVATE composable_kernel::device_operations) +target_link_libraries(client_fused_attention_bias PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations) diff --git a/client_example/09_quantization/CMakeLists.txt b/client_example/09_quantization/CMakeLists.txt index ac11aad45d..65ad642ce2 100644 --- a/client_example/09_quantization/CMakeLists.txt +++ b/client_example/09_quantization/CMakeLists.txt @@ -1,22 +1,22 @@ if(DTYPES MATCHES "int8" OR NOT DEFINED DTYPES) add_executable(client_conv2d_fwd_bias_tanh_perchannel_quantization conv2d_fwd_bias_tanh_perchannel_quantization.cpp) -target_link_libraries(client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_conv2d_fwd_bias_relu_perchannel_quantization conv2d_fwd_bias_relu_perchannel_quantization.cpp) -target_link_libraries(client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_conv2d_fwd_bias_tanh_perlayer_quantization conv2d_fwd_bias_tanh_perlayer_quantization.cpp) -target_link_libraries(client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_conv2d_fwd_bias_relu_perlayer_quantization conv2d_fwd_bias_relu_perlayer_quantization.cpp) -target_link_libraries(client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_conv2d_fwd_perchannel_quantization conv2d_fwd_perchannel_quantization.cpp) -target_link_libraries(client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_conv2d_fwd_perlayer_quantization conv2d_fwd_perlayer_quantization.cpp) -target_link_libraries(client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) add_executable(client_gemm_quantization gemm_quantization.cpp) -target_link_libraries(client_gemm_quantization PRIVATE composable_kernel::device_operations) +target_link_libraries(client_gemm_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations) endif() diff --git a/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt index 60543c7308..0cf308c6e1 100644 --- a/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt +++ b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt @@ -1,8 +1,8 @@ add_executable(client_grouped_conv2d_bwd_data grouped_conv2d_bwd_data.cpp) -target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp) -target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp) -target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt index b7dfc71826..dddfabb787 100644 --- a/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt +++ b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt @@ -4,8 +4,8 @@ add_executable(client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_f add_executable(client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp) add_executable(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp) -target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations) -target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations) -target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations) -target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_operations) -target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) +target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) +target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations) +target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations) +target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/12_elementwise_normalization/CMakeLists.txt b/client_example/12_elementwise_normalization/CMakeLists.txt index 1ba0e1279a..738647de59 100644 --- a/client_example/12_elementwise_normalization/CMakeLists.txt +++ b/client_example/12_elementwise_normalization/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_elementwise_layernorm2d elementwise_layernorm2d.cpp) -target_link_libraries(client_elementwise_layernorm2d PRIVATE composable_kernel::device_operations) +target_link_libraries(client_elementwise_layernorm2d PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/13_batchnorm/CMakeLists.txt b/client_example/13_batchnorm/CMakeLists.txt index fc4f9d395c..420ea25752 100644 --- a/client_example/13_batchnorm/CMakeLists.txt +++ b/client_example/13_batchnorm/CMakeLists.txt @@ -1,6 +1,6 @@ add_executable(client_batchnorm_fwd_nhwc batchnorm_fwd_nhwc.cpp) add_executable(client_batchnorm_bwd_nhwc batchnorm_bwd_nhwc.cpp) add_executable(client_batchnorm_infer_nhwc batchnorm_infer_nhwc.cpp) -target_link_libraries(client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_operations) -target_link_libraries(client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_operations) -target_link_libraries(client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_operations) +target_link_libraries(client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_other_operations) +target_link_libraries(client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_other_operations) +target_link_libraries(client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/14_instance_id/CMakeLists.txt b/client_example/14_instance_id/CMakeLists.txt index 87b2a9a0cb..6ba0e59f5a 100644 --- a/client_example/14_instance_id/CMakeLists.txt +++ b/client_example/14_instance_id/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp) -target_link_libraries(client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_operations) +target_link_libraries(client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/15_convnd_bwd_data/CMakeLists.txt b/client_example/15_convnd_bwd_data/CMakeLists.txt index 8a60a71674..f35cd82d79 100644 --- a/client_example/15_convnd_bwd_data/CMakeLists.txt +++ b/client_example/15_convnd_bwd_data/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp) add_executable(client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp) -target_link_libraries(client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_operations) -target_link_libraries(client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_conv_operations) +target_link_libraries(client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/15_gemm_add_multiply/CMakeLists.txt b/client_example/15_gemm_add_multiply/CMakeLists.txt index fd2dcf9614..4b4d762003 100644 --- a/client_example/15_gemm_add_multiply/CMakeLists.txt +++ b/client_example/15_gemm_add_multiply/CMakeLists.txt @@ -1,3 +1,3 @@ add_executable(client_gemm_add_multiply gemm_add_multiply.cpp) -target_link_libraries(client_gemm_add_multiply PRIVATE composable_kernel::device_operations) \ No newline at end of file +target_link_libraries(client_gemm_add_multiply PRIVATE composable_kernel::device_gemm_operations) \ No newline at end of file diff --git a/client_example/15_reduce/CMakeLists.txt b/client_example/15_reduce/CMakeLists.txt index d52675ba83..a944af5e54 100644 --- a/client_example/15_reduce/CMakeLists.txt +++ b/client_example/15_reduce/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_reduce_nhwc_c reduce_nhwc_c.cpp) -target_link_libraries(client_reduce_nhwc_c PRIVATE composable_kernel::device_operations) +target_link_libraries(client_reduce_nhwc_c PRIVATE composable_kernel::device_reduction_operations) diff --git a/client_example/16_convnd_fwd/CMakeLists.txt b/client_example/16_convnd_fwd/CMakeLists.txt index 249c2c030f..5279e3dfcf 100644 --- a/client_example/16_convnd_fwd/CMakeLists.txt +++ b/client_example/16_convnd_fwd/CMakeLists.txt @@ -1,15 +1,15 @@ if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES) add_executable(client_conv3d_fwd_fp16 conv3d_fwd_fp16.cpp) - target_link_libraries(client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_operations) + target_link_libraries(client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_conv_operations) endif() if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES) add_executable(client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp) - target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_operations) + target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations) endif() if((DTYPES MATCHES "fp32") OR NOT DEFINED DTYPES) add_executable(client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp) - target_link_libraries(client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_operations) + target_link_libraries(client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_conv_operations) endif() diff --git a/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt index 659e6769d8..fd315afbd2 100644 --- a/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt +++ b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp) -target_link_libraries(client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_operations) \ No newline at end of file +target_link_libraries(client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations) \ No newline at end of file diff --git a/client_example/18_groupnorm/CMakeLists.txt b/client_example/18_groupnorm/CMakeLists.txt index 17c88cb61b..dee85f9a60 100644 --- a/client_example/18_groupnorm/CMakeLists.txt +++ b/client_example/18_groupnorm/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_groupnorm_swish groupnorm_swish.cpp) -target_link_libraries(client_groupnorm_swish PRIVATE composable_kernel::device_operations) +target_link_libraries(client_groupnorm_swish PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/19_pool/CMakeLists.txt b/client_example/19_pool/CMakeLists.txt index d4e2e6d4dc..861c1a3257 100644 --- a/client_example/19_pool/CMakeLists.txt +++ b/client_example/19_pool/CMakeLists.txt @@ -1,11 +1,11 @@ add_executable(client_max_pool2d_fwd max_pool2d_fwd.cpp) -target_link_libraries(client_max_pool2d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_max_pool2d_fwd PRIVATE composable_kernel::device_other_operations) add_executable(client_max_pool2d_bwd max_pool2d_bwd.cpp) -target_link_libraries(client_max_pool2d_bwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_max_pool2d_bwd PRIVATE composable_kernel::device_other_operations) add_executable(client_avg_pool3d_fwd avg_pool3d_fwd.cpp) -target_link_libraries(client_avg_pool3d_fwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_avg_pool3d_fwd PRIVATE composable_kernel::device_other_operations) add_executable(client_avg_pool3d_bwd avg_pool3d_bwd.cpp) -target_link_libraries(client_avg_pool3d_bwd PRIVATE composable_kernel::device_operations) +target_link_libraries(client_avg_pool3d_bwd PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/20_splitk_gemm/CMakeLists.txt b/client_example/20_splitk_gemm/CMakeLists.txt index 5571ed1d70..a3dc853767 100644 --- a/client_example/20_splitk_gemm/CMakeLists.txt +++ b/client_example/20_splitk_gemm/CMakeLists.txt @@ -1,4 +1,4 @@ if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES) add_executable(client_splitK_gemm splitK_gemm_fp16_f8.cpp) - target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_operations) + target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations) endif() diff --git a/client_example/21_grouped_gemm_bias/CMakeLists.txt b/client_example/21_grouped_gemm_bias/CMakeLists.txt index a2abd15731..92e31495c2 100644 --- a/client_example/21_grouped_gemm_bias/CMakeLists.txt +++ b/client_example/21_grouped_gemm_bias/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_grouped_gemm_fixed_nk_bias_fp16 grouped_gemm_fixed_nk_bias_fp16.cpp) -target_link_libraries(client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_gemm_operations) diff --git a/client_example/22_grouped_gemm/CMakeLists.txt b/client_example/22_grouped_gemm/CMakeLists.txt index 05b9e1e29d..19c613381e 100644 --- a/client_example/22_grouped_gemm/CMakeLists.txt +++ b/client_example/22_grouped_gemm/CMakeLists.txt @@ -1,8 +1,8 @@ add_executable(client_grouped_gemm_fixed_nk_fp16 grouped_gemm_fixed_nk_fp16.cpp) -target_link_libraries(client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_gemm_operations) add_executable(client_grouped_gemm_fixed_nk_fp8 grouped_gemm_fixed_nk_fp8.cpp) -target_link_libraries(client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_gemm_operations) add_executable(client_grouped_gemm_fixed_nk_i8 grouped_gemm_fixed_nk_i8.cpp) -target_link_libraries(client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_gemm_operations) diff --git a/client_example/22_im2col_col2im/CMakeLists.txt b/client_example/22_im2col_col2im/CMakeLists.txt index 47ac42fe87..d938d8cfb3 100644 --- a/client_example/22_im2col_col2im/CMakeLists.txt +++ b/client_example/22_im2col_col2im/CMakeLists.txt @@ -1,5 +1,5 @@ add_executable(client_image_to_column image_to_column.cpp) -target_link_libraries(client_image_to_column PRIVATE composable_kernel::device_operations) +target_link_libraries(client_image_to_column PRIVATE composable_kernel::device_other_operations) add_executable(client_column_to_image column_to_image.cpp) -target_link_libraries(client_column_to_image PRIVATE composable_kernel::device_operations) +target_link_libraries(client_column_to_image PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/23_elementwise_transpose/CMakeLists.txt b/client_example/23_elementwise_transpose/CMakeLists.txt index a457aee16a..6b2421d881 100644 --- a/client_example/23_elementwise_transpose/CMakeLists.txt +++ b/client_example/23_elementwise_transpose/CMakeLists.txt @@ -1,2 +1,2 @@ add_executable(client_elementwise_transpose3d elementwise_transpose_3d.cpp) -target_link_libraries(client_elementwise_transpose3d PRIVATE composable_kernel::device_operations) +target_link_libraries(client_elementwise_transpose3d PRIVATE composable_kernel::device_other_operations) diff --git a/client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt b/client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt index 3a3ed235ac..101a5b97ee 100644 --- a/client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt +++ b/client_example/23_grouped_convnd_fwd_scaleadd_scaleadd_relu/CMakeLists.txt @@ -1,11 +1,11 @@ add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/24_grouped_convnd_fwd_scaleadd_ab/CMakeLists.txt b/client_example/24_grouped_convnd_fwd_scaleadd_ab/CMakeLists.txt index 94a5ad0685..38cd8b1791 100644 --- a/client_example/24_grouped_convnd_fwd_scaleadd_ab/CMakeLists.txt +++ b/client_example/24_grouped_convnd_fwd_scaleadd_ab/CMakeLists.txt @@ -1,11 +1,11 @@ add_executable(client_grouped_convnd_fwd_scaleadd_ab_fp32 grouped_conv_fwd_scaleadd_ab_fp32.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp32 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp32 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_ab_fp16 grouped_conv_fwd_scaleadd_ab_fp16.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp16 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_ab_bf16 grouped_conv_fwd_scaleadd_ab_bf16.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_bf16 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_bf16 PRIVATE composable_kernel::device_conv_operations) add_executable(client_grouped_convnd_fwd_scaleadd_ab_int8 grouped_conv_fwd_scaleadd_ab_int8.cpp) -target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_int8 PRIVATE composable_kernel::device_operations) +target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_int8 PRIVATE composable_kernel::device_conv_operations) diff --git a/client_example/CMakeLists.txt b/client_example/CMakeLists.txt index eb793b3cbd..753f5e5ae5 100644 --- a/client_example/CMakeLists.txt +++ b/client_example/CMakeLists.txt @@ -48,7 +48,7 @@ else() endif() endif() -find_package(composable_kernel COMPONENTS device_operations) +find_package(composable_kernel COMPONENTS device_other_operations device_gemm_operations device_conv_operations device_contraction_operations device_reduction_operations) find_package(hip REQUIRED PATHS /opt/rocm) message(STATUS "Build with HIP ${hip_VERSION}") diff --git a/example/27_layernorm2d_fwd/run_layernorm_example.inc b/example/27_layernorm2d_fwd/run_layernorm_example.inc index 02b60fe548..23608a1eea 100644 --- a/example/27_layernorm2d_fwd/run_layernorm_example.inc +++ b/example/27_layernorm2d_fwd/run_layernorm_example.inc @@ -44,9 +44,9 @@ int run_layernorm2d_fwd_example() {0, 1}, std::vector{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, {1}, 1e-4, x_dev.GetDeviceBuffer(), diff --git a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc index ab6f317bc6..853ff791a6 100644 --- a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc +++ b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc @@ -65,9 +65,9 @@ int run_groupnorm_fwd_example(int argc, char* argv[]) {0, 0, 0, C, 1}, std::vector{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, {1, 2, 4}, // reduction dimension: [H, W, C] 1e-6, x_dev.GetDeviceBuffer(), diff --git a/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc b/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc index f75c01ec61..1a0b558e2c 100644 --- a/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc +++ b/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc @@ -46,9 +46,9 @@ int run_layernorm4d_fwd_example() {0, W * C, C, 1}, std::vector{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, std::vector{save_mean.mDesc.GetStrides().begin(), - save_mean.mDesc.GetStrides().end()}, + save_mean.mDesc.GetStrides().end()}, {1, 2, 3}, 1e-4, x_dev.GetDeviceBuffer(), diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index 9cb5d0e9aa..ac01c1b416 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -58,7 +58,12 @@ endfunction(add_instance_library INSTANCE_NAME) file(GLOB dir_list LIST_DIRECTORIES true *) -set(CK_DEVICE_INSTANCES) +set(CK_DEVICE_OTHER_INSTANCES) +set(CK_DEVICE_GEMM_INSTANCES) +set(CK_DEVICE_CONV_INSTANCES) +set(CK_DEVICE_MHA_INSTANCES) +set(CK_DEVICE_CONTRACTION_INSTANCES) +set(CK_DEVICE_REDUCTION_INSTANCES) FOREACH(subdir_path ${dir_list}) set(target_dir) IF(IS_DIRECTORY "${subdir_path}") @@ -122,7 +127,19 @@ FOREACH(subdir_path ${dir_list}) if((add_inst EQUAL 1)) get_filename_component(target_dir ${subdir_path} NAME) add_subdirectory(${target_dir}) - list(APPEND CK_DEVICE_INSTANCES $) + if("${cmake_instance}" MATCHES "gemm") + list(APPEND CK_DEVICE_GEMM_INSTANCES $) + elseif("${cmake_instance}" MATCHES "conv") + list(APPEND CK_DEVICE_CONV_INSTANCES $) + elseif("${cmake_instance}" MATCHES "mha") + list(APPEND CK_DEVICE_MHA_INSTANCES $) + elseif("${cmake_instance}" MATCHES "contr") + list(APPEND CK_DEVICE_CONTRACTION_INSTANCES $) + elseif("${cmake_instance}" MATCHES "reduce") + list(APPEND CK_DEVICE_REDUCTION_INSTANCES $) + else() + list(APPEND CK_DEVICE_OTHER_INSTANCES $) + endif() message("add_instance_directory ${subdir_path}") else() message("skip_instance_directory ${subdir_path}") @@ -130,50 +147,138 @@ FOREACH(subdir_path ${dir_list}) ENDIF() ENDFOREACH() -add_library(device_operations STATIC ${CK_DEVICE_INSTANCES}) -add_library(composablekernels::device_operations ALIAS device_operations) +if(CK_DEVICE_OTHER_INSTANCES) + add_library(device_other_operations STATIC ${CK_DEVICE_OTHER_INSTANCES}) + add_library(composablekernels::device_other_operations ALIAS device_other_operations) + target_compile_features(device_other_operations PUBLIC) + set_target_properties(device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_other_operations PUBLIC + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + $ + ) + rocm_install(TARGETS device_other_operations + EXPORT device_other_operationsTargets) + rocm_install(EXPORT device_other_operationsTargets + FILE composable_kerneldevice_other_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() +if(CK_DEVICE_GEMM_INSTANCES) + add_library(device_gemm_operations STATIC ${CK_DEVICE_GEMM_INSTANCES}) + add_library(composablekernels::device_gemm_operations ALIAS device_gemm_operations) + target_compile_features(device_gemm_operations PUBLIC) + set_target_properties(device_gemm_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_gemm_operations PUBLIC + $ + ) + rocm_install(TARGETS device_gemm_operations + EXPORT device_gemm_operationsTargets) + rocm_install(EXPORT device_gemm_operationsTargets + FILE composable_kerneldevice_gemm_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() +if(CK_DEVICE_CONV_INSTANCES) + add_library(device_conv_operations STATIC ${CK_DEVICE_CONV_INSTANCES}) + add_library(composablekernels::device_conv_operations ALIAS device_conv_operations) + target_compile_features(device_conv_operations PUBLIC) + set_target_properties(device_conv_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_conv_operations PUBLIC + $ + $ + $ + $ + $ + ) + rocm_install(TARGETS device_conv_operations + EXPORT device_conv_operationsTargets) + rocm_install(EXPORT device_conv_operationsTargets + FILE composable_kerneldevice_conv_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() +if(CK_DEVICE_MHA_INSTANCES) + add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES}) + add_library(composablekernels::device_mha_operations ALIAS device_mha_operations) + target_compile_features(device_mha_operations PUBLIC) + set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_mha_operations PUBLIC + $ + ) + rocm_install(TARGETS device_mha_operations + EXPORT device_mha_operationsTargets) + rocm_install(EXPORT device_mha_operationsTargets + FILE composable_kerneldevice_mha_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() +if(CK_DEVICE_CONTRACTION_INSTANCES) + add_library(device_contraction_operations STATIC ${CK_DEVICE_CONTRACTION_INSTANCES}) + add_library(composablekernels::device_contraction_operations ALIAS device_contraction_operations) + target_compile_features(device_contraction_operations PUBLIC) + set_target_properties(device_contraction_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_contraction_operations PUBLIC + $ + $ + ) + rocm_install(TARGETS device_contraction_operations + EXPORT device_contraction_operationsTargets) + rocm_install(EXPORT device_contraction_operationsTargets + FILE composable_kerneldevice_contraction_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() +if(CK_DEVICE_REDUCTION_INSTANCES) + add_library(device_reduction_operations STATIC ${CK_DEVICE_REDUCTION_INSTANCES}) + add_library(composablekernels::device_reduction_operations ALIAS device_reduction_operations) + target_compile_features(device_reduction_operations PUBLIC) + set_target_properties(device_reduction_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) + target_include_directories(device_reduction_operations PUBLIC + $ + ) + rocm_install(TARGETS device_reduction_operations + EXPORT device_reduction_operationsTargets) + rocm_install(EXPORT device_reduction_operationsTargets + FILE composable_kerneldevice_reduction_operationsTargets.cmake + NAMESPACE composable_kernel:: + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel + ) +endif() + +add_library(device_operations INTERFACE) +target_link_libraries(device_operations INTERFACE + device_contraction_operations + device_conv_operations + device_gemm_operations + device_other_operations + device_reduction_operations + utility) + set(DEV_OPS_INC_DIRS ${PROJECT_SOURCE_DIR}/include/ck/ ${PROJECT_SOURCE_DIR}/library/include/ck/ ) - -target_compile_features(device_operations PUBLIC) -set_target_properties(device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) -target_include_directories(device_operations PUBLIC - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ - $ -) - -#once new arches are enabled make this an option on the main cmake file -# and pass down here to be exported -target_compile_options(device_operations PRIVATE - --offload-arch=gfx908 - --offload-arch=gfx90a -) - -# install(TARGETS device_operations LIBRARY DESTINATION lib) -rocm_install(TARGETS device_operations - EXPORT device_operationsTargets) - rocm_install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck) -rocm_install(EXPORT device_operationsTargets - FILE composable_kerneldevice_operationsTargets.cmake - NAMESPACE composable_kernel:: - DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel -) + diff --git a/test/transpose/test_transpose_ut_cases.inc b/test/transpose/test_transpose_ut_cases.inc index 8dd37a71b3..59a2a6c72c 100644 --- a/test/transpose/test_transpose_ut_cases.inc +++ b/test/transpose/test_transpose_ut_cases.inc @@ -14,7 +14,6 @@ TYPED_TEST(TestTranspose, Test1) this->Run(); } - TYPED_TEST(TestTranpose, Test2) { std::vector Ms{127, 255, 312, 799, 1573}; @@ -27,4 +26,3 @@ TYPED_TEST(TestTranpose, Test2) this->Run(); } -