mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-13 17:55:48 +00:00
Split the static library into several files. (#1044)
* spolit the static library into several
* update lib paths and fix client example
* do not use device_mha_operarions for client examples
* use appropriate libs to link to client examples
* remove the gpu/transpose path from the list
* try fixing clinet examples 3,4,9
* add necessary libs for client examples
* fix the layernorm client example
* fix the client examples 23 and 24
* fix typo
* add interface library and refresh clang format
[ROCm/composable_kernel commit: 7965d66a81]
This commit is contained in:
@@ -1,6 +1,6 @@
|
||||
@PACKAGE_INIT@
|
||||
|
||||
set(_composable_kernel_supported_components device_operations utility)
|
||||
set(_composable_kernel_supported_components device_other_operations device_gemm_operations device_conv_operations device_mha_operations device_contraction_operations device_reduction_operations utility)
|
||||
|
||||
foreach(_comp ${composable_kernel_FIND_COMPONENTS})
|
||||
if(NOT _comp IN_LIST _composable_kernel_supported_components)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_gemm gemm.cpp)
|
||||
target_link_libraries(client_gemm PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
add_custom_target(client_gemm_fastgelu_examples)
|
||||
|
||||
add_executable(client_gemm_add_add_fastgelu gemm_add_add_fastgelu.cpp)
|
||||
target_link_libraries(client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_add_fastgelu PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_gemm_add_fastgelu gemm_add_fastgelu.cpp)
|
||||
target_link_libraries(client_gemm_add_fastgelu PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_fastgelu PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_gemm_fastgelu gemm_fastgelu.cpp)
|
||||
target_link_libraries(client_gemm_fastgelu PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_dependencies(client_gemm_fastgelu_examples client_gemm_add_add_fastgelu client_gemm_add_fastgelu
|
||||
client_gemm_fastgelu)
|
||||
@@ -15,13 +15,13 @@ add_dependencies(client_gemm_fastgelu_examples client_gemm_add_add_fastgelu clie
|
||||
add_custom_target(client_gemm_fastgelu_generic_examples)
|
||||
|
||||
add_executable(client_gemm_add_add_fastgelu_generic gemm_add_add_fastgelu_generic.cpp)
|
||||
target_link_libraries(client_gemm_add_add_fastgelu_generic PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_add_fastgelu_generic composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_gemm_add_fastgelu_generic gemm_add_fastgelu_generic.cpp)
|
||||
target_link_libraries(client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_gemm_fastgelu_generic gemm_fastgelu_generic.cpp)
|
||||
target_link_libraries(client_gemm_fastgelu_generic PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_fastgelu_generic PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_dependencies(client_gemm_fastgelu_generic_examples client_gemm_add_add_fastgelu_generic
|
||||
client_gemm_add_fastgelu_generic client_gemm_fastgelu_generic)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_gemm_add_add_layernorm_naive gemm_add_add_layernorm_naive.cpp)
|
||||
target_link_libraries(client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_add_layernorm_naive PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_gemm_add_relu_add_layernorm_welford gemm_add_relu_add_layernorm_welford.cpp)
|
||||
target_link_libraries(client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_relu_add_layernorm_welford PRIVATE composable_kernel::device_gemm_operations composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
add_executable(client_contraction_scale_fp32 contraction_scale_fp32.cpp)
|
||||
target_link_libraries(client_contraction_scale_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_contraction_scale_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_contraction_bilinear_fp32 contraction_bilinear_fp32.cpp)
|
||||
target_link_libraries(client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_contraction_bilinear_fp32 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_contraction_scale_fp64 contraction_scale_fp64.cpp)
|
||||
target_link_libraries(client_contraction_scale_fp64 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_contraction_scale_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_contraction_bilinear_fp64 contraction_bilinear_fp64.cpp)
|
||||
target_link_libraries(client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_contraction_bilinear_fp64 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(contraction_g1m2n3k1_add_xdl_fp16 contraction_g1m2n3k1_add_xdl_fp16.cpp)
|
||||
target_link_libraries(contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(contraction_g1m2n3k1_add_xdl_fp16 PRIVATE composable_kernel::device_other_operations composable_kernel::device_contraction_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_layernorm2d_fwd layernorm2d_fwd.cpp)
|
||||
target_link_libraries(client_layernorm2d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_layernorm2d_fwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_layernorm4d_fwd layernorm4d_fwd.cpp)
|
||||
target_link_libraries(client_layernorm4d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_layernorm4d_fwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_softmax4d softmax4d.cpp)
|
||||
target_link_libraries(client_softmax4d PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_softmax4d PRIVATE composable_kernel::device_other_operations composable_kernel::device_reduction_operations)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_grouped_conv2d_fwd grouped_conv2d_fwd.cpp)
|
||||
target_link_libraries(client_grouped_conv2d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv2d_fwd PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_conv1d_fwd grouped_conv1d_fwd.cpp)
|
||||
target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv1d_fwd PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_fused_attention fused_attention.cpp)
|
||||
target_link_libraries(client_fused_attention PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_fused_attention PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_fused_attention_bias fused_attention_bias.cpp)
|
||||
target_link_libraries(client_fused_attention_bias PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_fused_attention_bias PRIVATE composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
if(DTYPES MATCHES "int8" OR NOT DEFINED DTYPES)
|
||||
add_executable(client_conv2d_fwd_bias_tanh_perchannel_quantization conv2d_fwd_bias_tanh_perchannel_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_bias_tanh_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_conv2d_fwd_bias_relu_perchannel_quantization conv2d_fwd_bias_relu_perchannel_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_bias_relu_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_conv2d_fwd_bias_tanh_perlayer_quantization conv2d_fwd_bias_tanh_perlayer_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_bias_tanh_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_conv2d_fwd_bias_relu_perlayer_quantization conv2d_fwd_bias_relu_perlayer_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_bias_relu_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_conv2d_fwd_perchannel_quantization conv2d_fwd_perchannel_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_perchannel_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_conv2d_fwd_perlayer_quantization conv2d_fwd_perlayer_quantization.cpp)
|
||||
target_link_libraries(client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv2d_fwd_perlayer_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_gemm_quantization gemm_quantization.cpp)
|
||||
target_link_libraries(client_gemm_quantization PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_quantization PRIVATE composable_kernel::device_conv_operations composable_kernel::device_other_operations composable_kernel::device_gemm_operations)
|
||||
endif()
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
add_executable(client_grouped_conv2d_bwd_data grouped_conv2d_bwd_data.cpp)
|
||||
target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv2d_bwd_data PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_conv3d_bwd_data grouped_conv3d_bwd_data.cpp)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_data PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_data_input_fp16_comp_bf8f8 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -4,8 +4,8 @@ add_executable(client_grouped_conv3d_bwd_weight_fp16 grouped_conv3d_bwd_weight_f
|
||||
add_executable(client_grouped_conv3d_bwd_weight_fp32 grouped_conv3d_bwd_weight_fp32.cpp)
|
||||
add_executable(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp)
|
||||
|
||||
target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_conv1d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
target_link_libraries(client_grouped_conv2d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp32 PRIVATE composable_kernel::device_conv_operations)
|
||||
target_link_libraries(client_grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_elementwise_layernorm2d elementwise_layernorm2d.cpp)
|
||||
target_link_libraries(client_elementwise_layernorm2d PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_elementwise_layernorm2d PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
add_executable(client_batchnorm_fwd_nhwc batchnorm_fwd_nhwc.cpp)
|
||||
add_executable(client_batchnorm_bwd_nhwc batchnorm_bwd_nhwc.cpp)
|
||||
add_executable(client_batchnorm_infer_nhwc batchnorm_infer_nhwc.cpp)
|
||||
target_link_libraries(client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_batchnorm_fwd_nhwc PRIVATE composable_kernel::device_other_operations)
|
||||
target_link_libraries(client_batchnorm_bwd_nhwc PRIVATE composable_kernel::device_other_operations)
|
||||
target_link_libraries(client_batchnorm_infer_nhwc PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_batchnorm_fwd_instance_id batchnorm_fwd_instance_id.cpp)
|
||||
target_link_libraries(client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_batchnorm_fwd_instance_id PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_conv3d_bwd_data_fp16 conv3d_bwd_data_fp16.cpp)
|
||||
add_executable(client_conv3d_bwd_data_fp32 conv3d_bwd_data_fp32.cpp)
|
||||
|
||||
target_link_libraries(client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv3d_bwd_data_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
target_link_libraries(client_conv3d_bwd_data_fp32 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
|
||||
add_executable(client_gemm_add_multiply gemm_add_multiply.cpp)
|
||||
target_link_libraries(client_gemm_add_multiply PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_gemm_add_multiply PRIVATE composable_kernel::device_gemm_operations)
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_reduce_nhwc_c reduce_nhwc_c.cpp)
|
||||
target_link_libraries(client_reduce_nhwc_c PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_reduce_nhwc_c PRIVATE composable_kernel::device_reduction_operations)
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
if((DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
|
||||
add_executable(client_conv3d_fwd_fp16 conv3d_fwd_fp16.cpp)
|
||||
target_link_libraries(client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv3d_fwd_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
endif()
|
||||
|
||||
if((DTYPES MATCHES "fp8") OR NOT DEFINED DTYPES)
|
||||
add_executable(client_conv3d_fwd_fp16_comp_fp8 conv3d_fwd_fp16_comp_fp8.cpp)
|
||||
target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv3d_fwd_fp16_comp_fp8 PRIVATE composable_kernel::device_conv_operations)
|
||||
endif()
|
||||
|
||||
if((DTYPES MATCHES "fp32") OR NOT DEFINED DTYPES)
|
||||
add_executable(client_conv3d_fwd_fp32 conv3d_fwd_fp32.cpp)
|
||||
target_link_libraries(client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_conv3d_fwd_fp32 PRIVATE composable_kernel::device_conv_operations)
|
||||
endif()
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_grouped_gemm_fastgelu grouped_gemm_fastgelu.cpp)
|
||||
target_link_libraries(client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_gemm_fastgelu PRIVATE composable_kernel::device_gemm_operations)
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_groupnorm_swish groupnorm_swish.cpp)
|
||||
target_link_libraries(client_groupnorm_swish PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_groupnorm_swish PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
add_executable(client_max_pool2d_fwd max_pool2d_fwd.cpp)
|
||||
target_link_libraries(client_max_pool2d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_max_pool2d_fwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_max_pool2d_bwd max_pool2d_bwd.cpp)
|
||||
target_link_libraries(client_max_pool2d_bwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_max_pool2d_bwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_avg_pool3d_fwd avg_pool3d_fwd.cpp)
|
||||
target_link_libraries(client_avg_pool3d_fwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_avg_pool3d_fwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_avg_pool3d_bwd avg_pool3d_bwd.cpp)
|
||||
target_link_libraries(client_avg_pool3d_bwd PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_avg_pool3d_bwd PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
if((DTYPES MATCHES "fp8" AND DTYPES MATCHES "fp16") OR NOT DEFINED DTYPES)
|
||||
add_executable(client_splitK_gemm splitK_gemm_fp16_f8.cpp)
|
||||
target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_splitK_gemm PRIVATE composable_kernel::device_gemm_operations)
|
||||
endif()
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_grouped_gemm_fixed_nk_bias_fp16 grouped_gemm_fixed_nk_bias_fp16.cpp)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_bias_fp16 PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
add_executable(client_grouped_gemm_fixed_nk_fp16 grouped_gemm_fixed_nk_fp16.cpp)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_fp16 PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_grouped_gemm_fixed_nk_fp8 grouped_gemm_fixed_nk_fp8.cpp)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_fp8 PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
add_executable(client_grouped_gemm_fixed_nk_i8 grouped_gemm_fixed_nk_i8.cpp)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_gemm_fixed_nk_i8 PRIVATE composable_kernel::device_gemm_operations)
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
add_executable(client_image_to_column image_to_column.cpp)
|
||||
target_link_libraries(client_image_to_column PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_image_to_column PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
add_executable(client_column_to_image column_to_image.cpp)
|
||||
target_link_libraries(client_column_to_image PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_column_to_image PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_executable(client_elementwise_transpose3d elementwise_transpose_3d.cpp)
|
||||
target_link_libraries(client_elementwise_transpose3d PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_elementwise_transpose3d PRIVATE composable_kernel::device_other_operations)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp32 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_bf16 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_scaleadd_relu_int8 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_ab_fp32 grouped_conv_fwd_scaleadd_ab_fp32.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp32 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp32 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_ab_fp16 grouped_conv_fwd_scaleadd_ab_fp16.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_fp16 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_ab_bf16 grouped_conv_fwd_scaleadd_ab_bf16.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_bf16 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_bf16 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
add_executable(client_grouped_convnd_fwd_scaleadd_ab_int8 grouped_conv_fwd_scaleadd_ab_int8.cpp)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_int8 PRIVATE composable_kernel::device_operations)
|
||||
target_link_libraries(client_grouped_convnd_fwd_scaleadd_ab_int8 PRIVATE composable_kernel::device_conv_operations)
|
||||
|
||||
@@ -48,7 +48,7 @@ else()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(composable_kernel COMPONENTS device_operations)
|
||||
find_package(composable_kernel COMPONENTS device_other_operations device_gemm_operations device_conv_operations device_contraction_operations device_reduction_operations)
|
||||
find_package(hip REQUIRED PATHS /opt/rocm)
|
||||
message(STATUS "Build with HIP ${hip_VERSION}")
|
||||
|
||||
|
||||
@@ -44,9 +44,9 @@ int run_layernorm2d_fwd_example()
|
||||
{0, 1},
|
||||
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
{1},
|
||||
1e-4,
|
||||
x_dev.GetDeviceBuffer(),
|
||||
|
||||
@@ -65,9 +65,9 @@ int run_groupnorm_fwd_example(int argc, char* argv[])
|
||||
{0, 0, 0, C, 1},
|
||||
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
{1, 2, 4}, // reduction dimension: [H, W, C]
|
||||
1e-6,
|
||||
x_dev.GetDeviceBuffer(),
|
||||
|
||||
@@ -46,9 +46,9 @@ int run_layernorm4d_fwd_example()
|
||||
{0, W * C, C, 1},
|
||||
std::vector<ck::index_t>{y.mDesc.GetStrides().begin(), y.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
std::vector<ck::index_t>{save_mean.mDesc.GetStrides().begin(),
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
save_mean.mDesc.GetStrides().end()},
|
||||
{1, 2, 3},
|
||||
1e-4,
|
||||
x_dev.GetDeviceBuffer(),
|
||||
|
||||
@@ -58,7 +58,12 @@ endfunction(add_instance_library INSTANCE_NAME)
|
||||
|
||||
|
||||
file(GLOB dir_list LIST_DIRECTORIES true *)
|
||||
set(CK_DEVICE_INSTANCES)
|
||||
set(CK_DEVICE_OTHER_INSTANCES)
|
||||
set(CK_DEVICE_GEMM_INSTANCES)
|
||||
set(CK_DEVICE_CONV_INSTANCES)
|
||||
set(CK_DEVICE_MHA_INSTANCES)
|
||||
set(CK_DEVICE_CONTRACTION_INSTANCES)
|
||||
set(CK_DEVICE_REDUCTION_INSTANCES)
|
||||
FOREACH(subdir_path ${dir_list})
|
||||
set(target_dir)
|
||||
IF(IS_DIRECTORY "${subdir_path}")
|
||||
@@ -122,7 +127,19 @@ FOREACH(subdir_path ${dir_list})
|
||||
if((add_inst EQUAL 1))
|
||||
get_filename_component(target_dir ${subdir_path} NAME)
|
||||
add_subdirectory(${target_dir})
|
||||
list(APPEND CK_DEVICE_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
if("${cmake_instance}" MATCHES "gemm")
|
||||
list(APPEND CK_DEVICE_GEMM_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
elseif("${cmake_instance}" MATCHES "conv")
|
||||
list(APPEND CK_DEVICE_CONV_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
elseif("${cmake_instance}" MATCHES "mha")
|
||||
list(APPEND CK_DEVICE_MHA_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
elseif("${cmake_instance}" MATCHES "contr")
|
||||
list(APPEND CK_DEVICE_CONTRACTION_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
elseif("${cmake_instance}" MATCHES "reduce")
|
||||
list(APPEND CK_DEVICE_REDUCTION_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
else()
|
||||
list(APPEND CK_DEVICE_OTHER_INSTANCES $<TARGET_OBJECTS:device_${target_dir}_instance>)
|
||||
endif()
|
||||
message("add_instance_directory ${subdir_path}")
|
||||
else()
|
||||
message("skip_instance_directory ${subdir_path}")
|
||||
@@ -130,50 +147,138 @@ FOREACH(subdir_path ${dir_list})
|
||||
ENDIF()
|
||||
ENDFOREACH()
|
||||
|
||||
add_library(device_operations STATIC ${CK_DEVICE_INSTANCES})
|
||||
add_library(composablekernels::device_operations ALIAS device_operations)
|
||||
|
||||
|
||||
if(CK_DEVICE_OTHER_INSTANCES)
|
||||
add_library(device_other_operations STATIC ${CK_DEVICE_OTHER_INSTANCES})
|
||||
add_library(composablekernels::device_other_operations ALIAS device_other_operations)
|
||||
target_compile_features(device_other_operations PUBLIC)
|
||||
set_target_properties(device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_other_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/utility>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_description>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/problem_transform>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/device>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/device/impl>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/grid>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/block>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/warp>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/thread>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/element>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/utility>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/quantization>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/softmax>
|
||||
)
|
||||
rocm_install(TARGETS device_other_operations
|
||||
EXPORT device_other_operationsTargets)
|
||||
rocm_install(EXPORT device_other_operationsTargets
|
||||
FILE composable_kerneldevice_other_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_GEMM_INSTANCES)
|
||||
add_library(device_gemm_operations STATIC ${CK_DEVICE_GEMM_INSTANCES})
|
||||
add_library(composablekernels::device_gemm_operations ALIAS device_gemm_operations)
|
||||
target_compile_features(device_gemm_operations PUBLIC)
|
||||
set_target_properties(device_gemm_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_gemm_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu>
|
||||
)
|
||||
rocm_install(TARGETS device_gemm_operations
|
||||
EXPORT device_gemm_operationsTargets)
|
||||
rocm_install(EXPORT device_gemm_operationsTargets
|
||||
FILE composable_kerneldevice_gemm_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_CONV_INSTANCES)
|
||||
add_library(device_conv_operations STATIC ${CK_DEVICE_CONV_INSTANCES})
|
||||
add_library(composablekernels::device_conv_operations ALIAS device_conv_operations)
|
||||
target_compile_features(device_conv_operations PUBLIC)
|
||||
set_target_properties(device_conv_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_conv_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd>
|
||||
)
|
||||
rocm_install(TARGETS device_conv_operations
|
||||
EXPORT device_conv_operationsTargets)
|
||||
rocm_install(EXPORT device_conv_operationsTargets
|
||||
FILE composable_kerneldevice_conv_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_MHA_INSTANCES)
|
||||
add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES})
|
||||
add_library(composablekernels::device_mha_operations ALIAS device_mha_operations)
|
||||
target_compile_features(device_mha_operations PUBLIC)
|
||||
set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_mha_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/mha>
|
||||
)
|
||||
rocm_install(TARGETS device_mha_operations
|
||||
EXPORT device_mha_operationsTargets)
|
||||
rocm_install(EXPORT device_mha_operationsTargets
|
||||
FILE composable_kerneldevice_mha_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_CONTRACTION_INSTANCES)
|
||||
add_library(device_contraction_operations STATIC ${CK_DEVICE_CONTRACTION_INSTANCES})
|
||||
add_library(composablekernels::device_contraction_operations ALIAS device_contraction_operations)
|
||||
target_compile_features(device_contraction_operations PUBLIC)
|
||||
set_target_properties(device_contraction_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_contraction_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/contraction>
|
||||
)
|
||||
rocm_install(TARGETS device_contraction_operations
|
||||
EXPORT device_contraction_operationsTargets)
|
||||
rocm_install(EXPORT device_contraction_operationsTargets
|
||||
FILE composable_kerneldevice_contraction_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_REDUCTION_INSTANCES)
|
||||
add_library(device_reduction_operations STATIC ${CK_DEVICE_REDUCTION_INSTANCES})
|
||||
add_library(composablekernels::device_reduction_operations ALIAS device_reduction_operations)
|
||||
target_compile_features(device_reduction_operations PUBLIC)
|
||||
set_target_properties(device_reduction_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_reduction_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/reduce>
|
||||
)
|
||||
rocm_install(TARGETS device_reduction_operations
|
||||
EXPORT device_reduction_operationsTargets)
|
||||
rocm_install(EXPORT device_reduction_operationsTargets
|
||||
FILE composable_kerneldevice_reduction_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
|
||||
add_library(device_operations INTERFACE)
|
||||
target_link_libraries(device_operations INTERFACE
|
||||
device_contraction_operations
|
||||
device_conv_operations
|
||||
device_gemm_operations
|
||||
device_other_operations
|
||||
device_reduction_operations
|
||||
utility)
|
||||
|
||||
set(DEV_OPS_INC_DIRS
|
||||
${PROJECT_SOURCE_DIR}/include/ck/
|
||||
${PROJECT_SOURCE_DIR}/library/include/ck/
|
||||
)
|
||||
|
||||
target_compile_features(device_operations PUBLIC)
|
||||
set_target_properties(device_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/utility>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_description>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/problem_transform>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/device>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/device/impl>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/grid>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/block>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/warp>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/thread>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/tensor_operation/gpu/element>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/utility>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu>
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/reduce>
|
||||
)
|
||||
|
||||
#once new arches are enabled make this an option on the main cmake file
|
||||
# and pass down here to be exported
|
||||
target_compile_options(device_operations PRIVATE
|
||||
--offload-arch=gfx908
|
||||
--offload-arch=gfx90a
|
||||
)
|
||||
|
||||
# install(TARGETS device_operations LIBRARY DESTINATION lib)
|
||||
rocm_install(TARGETS device_operations
|
||||
EXPORT device_operationsTargets)
|
||||
|
||||
rocm_install(DIRECTORY ${DEV_OPS_INC_DIRS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck)
|
||||
rocm_install(EXPORT device_operationsTargets
|
||||
FILE composable_kerneldevice_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
|
||||
|
||||
@@ -14,7 +14,6 @@ TYPED_TEST(TestTranspose, Test1)
|
||||
this->Run();
|
||||
}
|
||||
|
||||
|
||||
TYPED_TEST(TestTranpose, Test2)
|
||||
{
|
||||
std::vector<int> Ms{127, 255, 312, 799, 1573};
|
||||
@@ -27,4 +26,3 @@ TYPED_TEST(TestTranpose, Test2)
|
||||
|
||||
this->Run();
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user