diff --git a/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx b/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx new file mode 100755 index 0000000000..50693bbf1c Binary files /dev/null and b/.cache/clangd/index/add_device_operation_instance.hpp.C89DB8EBC3EFFE7D.idx differ diff --git a/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx b/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx new file mode 100755 index 0000000000..94088da4ed Binary files /dev/null and b/.cache/clangd/index/algorithm.hpp.3E7EDFC4AA4FB32B.idx differ diff --git a/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx b/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx new file mode 100755 index 0000000000..608fe43dd2 Binary files /dev/null and b/.cache/clangd/index/amd_address_space.hpp.FB92D54F0A7BAF6E.idx differ diff --git a/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx b/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx new file mode 100755 index 0000000000..c614279cad Binary files /dev/null and b/.cache/clangd/index/amd_buffer_addressing.hpp.7FB2D5AE48874EC4.idx differ diff --git a/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx b/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx new file mode 100755 index 0000000000..49910bac90 Binary files /dev/null and b/.cache/clangd/index/amd_gemm_dpp.hpp.55564336A8EE078E.idx differ diff --git a/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx b/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx new file mode 100755 index 0000000000..0515b5691d Binary files /dev/null and b/.cache/clangd/index/amd_inline_asm.hpp.7777A491B073ADB0.idx differ diff --git a/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx b/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx new file mode 100755 index 0000000000..02f003d38c Binary files /dev/null and b/.cache/clangd/index/amd_lds.hpp.3739CBB0B218918C.idx differ diff --git a/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx b/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx new file mode 100755 index 0000000000..4337c456aa Binary files /dev/null and b/.cache/clangd/index/amd_wave_read_first_lane.hpp.F8A301BEB898DCEC.idx differ diff --git a/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx b/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx new file mode 100755 index 0000000000..145a9262f3 Binary files /dev/null and b/.cache/clangd/index/amd_wmma.hpp.143B2B78EF519046.idx differ diff --git a/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx b/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx new file mode 100755 index 0000000000..44ede942d2 Binary files /dev/null and b/.cache/clangd/index/amd_xdlops.hpp.5A2FDE085CB4BC8A.idx differ diff --git a/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx b/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx new file mode 100755 index 0000000000..542e77153c Binary files /dev/null and b/.cache/clangd/index/array.hpp.EEBA77115A6888DA.idx differ diff --git a/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx b/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx new file mode 100755 index 0000000000..45c5f80942 Binary files /dev/null and b/.cache/clangd/index/avg_pool3d_bwd.hpp.40079D84C914EB9A.idx differ diff --git a/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx b/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx new file mode 100755 index 0000000000..5640c2495f Binary files /dev/null and b/.cache/clangd/index/avg_pool3d_bwd_ndhwc_instance_common.hpp.EDEFB32170FF2FE7.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx b/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx new file mode 100755 index 0000000000..11d356e4bb Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_bf16.cpp.CC3B9B35F218D95E.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx b/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx new file mode 100755 index 0000000000..11302a846b Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_common.hpp.C26898608FBBAD5A.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx b/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx new file mode 100755 index 0000000000..dc381a6072 Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_fp16.cpp.11C3DA7A01E70C82.idx differ diff --git a/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx b/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx new file mode 100755 index 0000000000..07a426bfd9 Binary files /dev/null and b/.cache/clangd/index/avgpool3d_bwd_fp32.cpp.4BE51B531517C414.idx differ diff --git a/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx b/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx new file mode 100755 index 0000000000..19d0939a1d Binary files /dev/null and b/.cache/clangd/index/batched_gemm.hpp.E9C31BFE1C18D556.idx differ diff --git a/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx b/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx new file mode 100755 index 0000000000..76c32d800f Binary files /dev/null and b/.cache/clangd/index/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp.25B35CBDBE38B07B.idx differ diff --git a/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx b/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx new file mode 100755 index 0000000000..d037c1ee38 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_add_relu_gemm_add.hpp.B9BDBBF21D219B22.idx differ diff --git a/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx b/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx new file mode 100755 index 0000000000..394f471dd9 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_bias_e_permute_xdl_fp16.cpp.6413F9674606CEBE.idx differ diff --git a/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx b/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx new file mode 100755 index 0000000000..902d5acac4 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_bias_softmax_gemm_permute.hpp.3291370BDDEB76AF.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx b/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx new file mode 100755 index 0000000000..ef6c3e6aa5 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm.hpp.3359348295C6E9CF.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx new file mode 100755 index 0000000000..69c184d54b Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_bf16.cpp.38FE56A993CA3FF4.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx new file mode 100755 index 0000000000..f323c850f6 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_fp16.cpp.F5472E9B2342F0A2.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx new file mode 100755 index 0000000000..e5ab2c6614 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_fp32.cpp.958D740430E8AB79.idx differ diff --git a/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx b/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx new file mode 100755 index 0000000000..ec7787e93f Binary files /dev/null and b/.cache/clangd/index/batched_gemm_gemm_xdl_int8.cpp.EF48A52F4EBD9A62.idx differ diff --git a/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx b/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx new file mode 100755 index 0000000000..0701564354 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.20C4D0C379DDEEBB.idx differ diff --git a/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx b/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx new file mode 100755 index 0000000000..e073f3b8fd Binary files /dev/null and b/.cache/clangd/index/batched_gemm_multi_d.hpp.4E3016E76D827F13.idx differ diff --git a/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx b/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx new file mode 100755 index 0000000000..531addd0c3 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_reduce_fp16.cpp.795EB59AA922209F.idx differ diff --git a/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx b/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx new file mode 100755 index 0000000000..c8f7589162 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_reduce_xdl_fp16.cpp.6EA78CC685DE72B1.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx new file mode 100755 index 0000000000..00857c0987 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp.3B79E782257965A3.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx new file mode 100755 index 0000000000..77ff6c2630 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.71938F78DFDFE1BE.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx new file mode 100755 index 0000000000..7d77b5be82 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp.74FE791FA1A9F5A4.idx differ diff --git a/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx new file mode 100755 index 0000000000..640850ea63 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp.B77888D4EF3A20F0.idx differ diff --git a/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx b/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx new file mode 100755 index 0000000000..4e073eccda Binary files /dev/null and b/.cache/clangd/index/batched_gemm_softmax_gemm.hpp.2D7D43E1651E0616.idx differ diff --git a/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx b/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx new file mode 100755 index 0000000000..0374afc0bb Binary files /dev/null and b/.cache/clangd/index/batched_gemm_softmax_gemm_permute.hpp.A9BA319B35F534EC.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx b/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx new file mode 100755 index 0000000000..9792e09850 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_bf16.cpp.ED68A85BB00385BD.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx b/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx new file mode 100755 index 0000000000..7a7a61505e Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_fp16.cpp.A653F9336CDDD36D.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx b/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx new file mode 100755 index 0000000000..85ac05a844 Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_fp32.cpp.B7D138F072E276A3.idx differ diff --git a/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx b/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx new file mode 100755 index 0000000000..c078cfe13a Binary files /dev/null and b/.cache/clangd/index/batched_gemm_xdl_int8.cpp.AEAA0DF5DDD6BDB1.idx differ diff --git a/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx b/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx new file mode 100755 index 0000000000..3169f340a3 Binary files /dev/null and b/.cache/clangd/index/batchnorm_backward.hpp.7322FEA177E6E308.idx differ diff --git a/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx b/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx new file mode 100755 index 0000000000..7b3e277872 Binary files /dev/null and b/.cache/clangd/index/batchnorm_backward_nhwc.cpp.7637AB6D79637171.idx differ diff --git a/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx b/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx new file mode 100755 index 0000000000..e95dacdc0d Binary files /dev/null and b/.cache/clangd/index/batchnorm_bwd_rank_4.cpp.15A9A6FE401A579D.idx differ diff --git a/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx b/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx new file mode 100755 index 0000000000..74aff7b325 Binary files /dev/null and b/.cache/clangd/index/batchnorm_common.hpp.7297BD2852C87A9D.idx differ diff --git a/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx b/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx new file mode 100755 index 0000000000..4ec87b29dc Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward.hpp.7C4BF3E98C6D2CE7.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx b/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx new file mode 100755 index 0000000000..5880cc9acd Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_inferring_nhwc.cpp.5CE047E1603B8333.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx b/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx new file mode 100755 index 0000000000..d1da7863f3 Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_training_nhwc.cpp.6918CB89E3F27E24.idx differ diff --git a/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx b/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx new file mode 100755 index 0000000000..32a4b286c2 Binary files /dev/null and b/.cache/clangd/index/batchnorm_forward_training_nhwc_obsolete.cpp.B9948F07339573BC.idx differ diff --git a/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx b/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx new file mode 100755 index 0000000000..373223b49c Binary files /dev/null and b/.cache/clangd/index/batchnorm_fwd_rank_4.cpp.051838AC0BBFCDA1.idx differ diff --git a/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx b/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx new file mode 100755 index 0000000000..37d2d3492d Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer.hpp.F7608824D3FE5589.idx differ diff --git a/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx b/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx new file mode 100755 index 0000000000..f1e9e4e8be Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer_impl.hpp.0B7228D5ED433292.idx differ diff --git a/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx b/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx new file mode 100755 index 0000000000..981c6c9695 Binary files /dev/null and b/.cache/clangd/index/batchnorm_infer_rank_4.cpp.16E993641F247D85.idx differ diff --git a/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx b/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx new file mode 100755 index 0000000000..ffabcaf6f9 Binary files /dev/null and b/.cache/clangd/index/binary_element_wise_operation.hpp.D7064E30A6CF6778.idx differ diff --git a/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx b/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx new file mode 100755 index 0000000000..cfee4b6bad Binary files /dev/null and b/.cache/clangd/index/block_to_ctile_map.hpp.F32EBB9EC094E255.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx b/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx new file mode 100755 index 0000000000..9c1bc1a5d6 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_dpp.hpp.5CB9342685AA4AD4.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx b/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx new file mode 100755 index 0000000000..10b29ea0ed Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_pipeline_xdlops.hpp.C000528561553163.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx b/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx new file mode 100755 index 0000000000..a980c11856 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_wmma.hpp.33F41B2FF8044BC6.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx b/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx new file mode 100755 index 0000000000..eab3d47158 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdl_traits.hpp.9E775B78211AACFC.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx b/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx new file mode 100755 index 0000000000..ad152908d5 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdlops.hpp.E5193AF5C1961F73.idx differ diff --git a/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx b/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx new file mode 100755 index 0000000000..7e053e3f29 Binary files /dev/null and b/.cache/clangd/index/blockwise_gemm_xdlops_skip_b_lds.hpp.BC4BEC2371CE0CF7.idx differ diff --git a/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx b/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx new file mode 100755 index 0000000000..f4dfa89289 Binary files /dev/null and b/.cache/clangd/index/blockwise_softmax.hpp.D0089700D1C9998B.idx differ diff --git a/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx b/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx new file mode 100755 index 0000000000..f7a53b2246 Binary files /dev/null and b/.cache/clangd/index/blockwise_welford.hpp.93790E23276AA4A9.idx differ diff --git a/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx b/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx new file mode 100755 index 0000000000..c07668af67 Binary files /dev/null and b/.cache/clangd/index/broadcast_add_2d_amn_bn.cpp.7B44FEB5DE986168.idx differ diff --git a/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx b/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx new file mode 100755 index 0000000000..eb1b2b8fe8 Binary files /dev/null and b/.cache/clangd/index/broadcast_add_3d_am_bmnk.cpp.D83172D8CBF829F1.idx differ diff --git a/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx b/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx new file mode 100755 index 0000000000..cec437ee3e Binary files /dev/null and b/.cache/clangd/index/c_style_pointer_cast.hpp.6866D538A80ECC76.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx b/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx new file mode 100755 index 0000000000..aa65d663ae Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_bf16.cpp.D34D118D7BC516E4.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx b/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx new file mode 100755 index 0000000000..7a1eaa350a Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_common.hpp.FBF552CB8A2753F4.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx b/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx new file mode 100755 index 0000000000..8f14e6179f Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_fp16.cpp.93DC5CFE4CF25E39.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx b/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx new file mode 100755 index 0000000000..2f716bd3db Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_fp32.cpp.D4109241584CC5F6.idx differ diff --git a/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx b/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx new file mode 100755 index 0000000000..87eb49378d Binary files /dev/null and b/.cache/clangd/index/cgemm_xdl_int8.cpp.7D0F341E5936B471.idx differ diff --git a/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx b/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx new file mode 100755 index 0000000000..8eba5263d1 Binary files /dev/null and b/.cache/clangd/index/check_err.hpp.8BCADBDE7B4A301B.idx differ diff --git a/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx b/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx new file mode 100755 index 0000000000..2fa7b44ac0 Binary files /dev/null and b/.cache/clangd/index/ck.hpp.5BA5ECB9E73F13E2.idx differ diff --git a/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx b/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx new file mode 100755 index 0000000000..e78adc52af Binary files /dev/null and b/.cache/clangd/index/cluster_descriptor.hpp.B962ADBD27415A8A.idx differ diff --git a/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx b/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx new file mode 100755 index 0000000000..aa0f8ec932 Binary files /dev/null and b/.cache/clangd/index/column_to_image_f32.cpp.B78257C7C44FE524.idx differ diff --git a/.cache/clangd/index/common.hpp.030C990214BE859F.idx b/.cache/clangd/index/common.hpp.030C990214BE859F.idx new file mode 100755 index 0000000000..d6e43323fe Binary files /dev/null and b/.cache/clangd/index/common.hpp.030C990214BE859F.idx differ diff --git a/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx b/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx new file mode 100755 index 0000000000..3125fa18e5 Binary files /dev/null and b/.cache/clangd/index/common.hpp.0CF2F8C8A70630AF.idx differ diff --git a/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx b/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx new file mode 100755 index 0000000000..ccefb22335 Binary files /dev/null and b/.cache/clangd/index/common.hpp.116380F0FD4C88BB.idx differ diff --git a/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx b/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx new file mode 100755 index 0000000000..7b586e6cdb Binary files /dev/null and b/.cache/clangd/index/common.hpp.30E7C524D1E7DCC8.idx differ diff --git a/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx b/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx new file mode 100755 index 0000000000..b10444bfb8 Binary files /dev/null and b/.cache/clangd/index/common.hpp.3324DE766DB5A26E.idx differ diff --git a/.cache/clangd/index/common.hpp.725393A22E55A928.idx b/.cache/clangd/index/common.hpp.725393A22E55A928.idx new file mode 100755 index 0000000000..de3a34a8e9 Binary files /dev/null and b/.cache/clangd/index/common.hpp.725393A22E55A928.idx differ diff --git a/.cache/clangd/index/common.hpp.7922E75661986F8F.idx b/.cache/clangd/index/common.hpp.7922E75661986F8F.idx new file mode 100755 index 0000000000..6f9762ba23 Binary files /dev/null and b/.cache/clangd/index/common.hpp.7922E75661986F8F.idx differ diff --git a/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx b/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx new file mode 100755 index 0000000000..9d274b8774 Binary files /dev/null and b/.cache/clangd/index/common.hpp.98AE17ADFF3CA289.idx differ diff --git a/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx b/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx new file mode 100755 index 0000000000..99c952d679 Binary files /dev/null and b/.cache/clangd/index/common.hpp.A2FB78C7F0E24A0E.idx differ diff --git a/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx b/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx new file mode 100755 index 0000000000..42c7798c5b Binary files /dev/null and b/.cache/clangd/index/common.hpp.AA5CB32F503E8500.idx differ diff --git a/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx b/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx new file mode 100755 index 0000000000..ace4d33f11 Binary files /dev/null and b/.cache/clangd/index/common.hpp.D8FBDC6F47AA6D80.idx differ diff --git a/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx b/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx new file mode 100755 index 0000000000..30311b4d79 Binary files /dev/null and b/.cache/clangd/index/common.hpp.E36EEFF638BE348F.idx differ diff --git a/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx b/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx new file mode 100755 index 0000000000..2df7dfc43c Binary files /dev/null and b/.cache/clangd/index/common.hpp.F52D770AC14BC38D.idx differ diff --git a/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx b/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx new file mode 100755 index 0000000000..60dfc17692 Binary files /dev/null and b/.cache/clangd/index/common.hpp.FF4590F87A18C9AE.idx differ diff --git a/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx b/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx new file mode 100755 index 0000000000..08aa5b65fc Binary files /dev/null and b/.cache/clangd/index/common_header.hpp.BCB8C81E7AAAD43F.idx differ diff --git a/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx b/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx new file mode 100755 index 0000000000..90c5ba47e8 Binary files /dev/null and b/.cache/clangd/index/common_instances.hpp.C20932BA881E0448.idx differ diff --git a/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx b/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx new file mode 100755 index 0000000000..26f2269620 Binary files /dev/null and b/.cache/clangd/index/container_element_picker.hpp.D0295372D9F2DFD0.idx differ diff --git a/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx b/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx new file mode 100755 index 0000000000..3b36d59535 Binary files /dev/null and b/.cache/clangd/index/container_helper.hpp.57BCB53456250D8A.idx differ diff --git a/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx b/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx new file mode 100755 index 0000000000..8ad808c800 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear.hpp.3FE8920B21D2C083.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx b/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx new file mode 100755 index 0000000000..9b0d0e5f37 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_bf16_compute_fp32.cpp.34FCAF706B5AEB99.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx new file mode 100755 index 0000000000..e5dc6c876f Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp16_compute_fp32.cpp.038CE2A80837D541.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx new file mode 100755 index 0000000000..e7d422e801 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32.cpp.37E0EC20BE4060B3.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx new file mode 100755 index 0000000000..cc3d4dd80d Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_bf16.cpp.4B6BEF9EB4DD9C01.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx new file mode 100755 index 0000000000..9f074231c6 Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp32_compute_fp16.cpp.F92204694821E2EC.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx new file mode 100755 index 0000000000..1d5b75057c Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp64.cpp.D740E9866ECDD95C.idx differ diff --git a/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx b/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx new file mode 100755 index 0000000000..251b6218be Binary files /dev/null and b/.cache/clangd/index/contraction_bilinear_xdl_fp64_compute_fp32.cpp.29832DEA4C5E2D0A.idx differ diff --git a/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx b/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx new file mode 100755 index 0000000000..f3d542513c Binary files /dev/null and b/.cache/clangd/index/contraction_multi_ABD_xdl_fp16.cpp.95E96326F13F98F9.idx differ diff --git a/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx b/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx new file mode 100755 index 0000000000..261b1f9244 Binary files /dev/null and b/.cache/clangd/index/contraction_scale.hpp.9A93BE54CAD13142.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx b/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx new file mode 100755 index 0000000000..316fef4806 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_bf16_compute_fp32.cpp.2E396E91009DF0FA.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx b/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx new file mode 100755 index 0000000000..8de9268ae7 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp16_compute_fp32.cpp.A7A07273739AC223.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx b/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx new file mode 100755 index 0000000000..694b3a5cf3 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32.cpp.1A2C9B556CFF860B.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx new file mode 100755 index 0000000000..29c90e89e3 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_bf16.cpp.301F203197216C44.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx new file mode 100755 index 0000000000..80d5bb0117 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp32_compute_fp16.cpp.F7341E88A32312DC.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx b/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx new file mode 100755 index 0000000000..0201b38d42 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp64.cpp.7417F385FB954651.idx differ diff --git a/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx b/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx new file mode 100755 index 0000000000..5542395726 Binary files /dev/null and b/.cache/clangd/index/contraction_scale_xdl_fp64_compute_fp32.cpp.02A2D58B76014A4B.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx new file mode 100755 index 0000000000..3ef24c5eb8 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp.D342A33945D6732C.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx new file mode 100755 index 0000000000..d3bbf74155 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp.E5EA635D0BE03F7D.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx b/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx new file mode 100755 index 0000000000..d9cfe305dd Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_perchannel_quantization_int8.cpp.DA805D9EE5DF962F.idx differ diff --git a/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx b/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx new file mode 100755 index 0000000000..002f943492 Binary files /dev/null and b/.cache/clangd/index/conv2d_fwd_xdl_perlayer_quantization_int8.cpp.7D138D1D290C5C53.idx differ diff --git a/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx b/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx new file mode 100755 index 0000000000..611e56f23c Binary files /dev/null and b/.cache/clangd/index/conv2d_quantization_common.hpp.2C2B760A7CE8525E.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx new file mode 100755 index 0000000000..cea088cc6d Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_bf16.cpp.8FB5CAA0FDE2EFE8.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx new file mode 100755 index 0000000000..234c2aa930 Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp16.cpp.A862E22D755445CE.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx new file mode 100755 index 0000000000..b8061eca8d Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_fp32.cpp.E8494EFCFB4F3B30.idx differ diff --git a/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx new file mode 100755 index 0000000000..5fe4201856 Binary files /dev/null and b/.cache/clangd/index/conv_fwd_xdl_scaleadd_ab_int8.cpp.97C188D55BA3C9C1.idx differ diff --git a/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx b/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx new file mode 100755 index 0000000000..21c4fd727b Binary files /dev/null and b/.cache/clangd/index/conv_tensor_rearrange.hpp.1BD4F049133037BC.idx differ diff --git a/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx b/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx new file mode 100755 index 0000000000..769092f8be Binary files /dev/null and b/.cache/clangd/index/conv_tensor_rearrange_op.hpp.1DE16BC6AE42EE5B.idx differ diff --git a/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx b/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx new file mode 100755 index 0000000000..aac25c6623 Binary files /dev/null and b/.cache/clangd/index/conv_util.cpp.2DB2D9F346007F63.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx b/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx new file mode 100755 index 0000000000..f8664073ed Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data.cpp.7C4E178AEE50DCF2.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx b/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx new file mode 100755 index 0000000000..d712d61aa8 Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_common.hpp.FAAF76603FF7D70B.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx b/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx new file mode 100755 index 0000000000..362e15c797 Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp.17D6FDBA1D601B9A.idx differ diff --git a/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx b/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx new file mode 100755 index 0000000000..1dcb1319b1 Binary files /dev/null and b/.cache/clangd/index/convnd_bwd_data_xdl_fp16.cpp.32F247EF8BEF9A27.idx differ diff --git a/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx b/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx new file mode 100755 index 0000000000..9351ce43d9 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd.cpp.63437EF3B0E7154B.idx differ diff --git a/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx b/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx new file mode 100755 index 0000000000..2ad3b26124 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_activ_multi_ab_common.hpp.F16D50D450CBF542.idx differ diff --git a/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx b/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx new file mode 100755 index 0000000000..3274c2601c Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_activ_unary_common.hpp.A3C6C6A8E8602CD2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx b/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx new file mode 100755 index 0000000000..9dd712f57a Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_common.hpp.1735C0647D85C158.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx b/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx new file mode 100755 index 0000000000..9f7e7ec210 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_bf16.cpp.32DD457450CC173D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx b/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx new file mode 100755 index 0000000000..983ca13427 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_fp16.cpp.57DCB3A8914F4BCB.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx b/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx new file mode 100755 index 0000000000..be88175ed3 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_fp32.cpp.7C22839582612580.idx differ diff --git a/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx b/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx new file mode 100755 index 0000000000..4f6fa22878 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_max_xdl_int8.cpp.E84F6E18A0DCB8A3.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx b/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx new file mode 100755 index 0000000000..338128dad4 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_abs_fp16.cpp.B14E3C722763D4B8.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx b/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx new file mode 100755 index 0000000000..26863ef07b Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_bf16.cpp.054A24EF5BFF4A1F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx b/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx new file mode 100755 index 0000000000..f0d49e1faf Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_bilinear_residual_fp16.cpp.C4A66C697A8DEBDA.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx b/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx new file mode 100755 index 0000000000..bbb1713a15 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_clippedrelu_fp16.cpp.4759FB8DCFC58E7D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx b/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx new file mode 100755 index 0000000000..ec61376da5 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_elu_fp16.cpp.922AD9048F6922A8.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx b/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx new file mode 100755 index 0000000000..4986ae43f4 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp16.cpp.E044D0CD44CCFE7F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx b/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx new file mode 100755 index 0000000000..78aa926495 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp32.cpp.911AF34195327474.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx b/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx new file mode 100755 index 0000000000..97c2ae2bcb Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_fp64.cpp.3D0123C922DFAFA2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx b/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx new file mode 100755 index 0000000000..6ca16ad7bb Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_int8.cpp.1BF1AFB682499568.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx b/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx new file mode 100755 index 0000000000..9f7c2c4e48 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_leakyrelu_fp16.cpp.BA4E6713D7C74F09.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx b/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx new file mode 100755 index 0000000000..bd633b7c76 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_pow_fp16.cpp.0F662794CF8A0DB2.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx b/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx new file mode 100755 index 0000000000..5abe29475b Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_relu_fp16.cpp.8F7B959CCFC1E66F.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx new file mode 100755 index 0000000000..0d50987879 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp.54C5613FFA1E2460.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx new file mode 100755 index 0000000000..c21fab0457 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp.303711D8C093AB3A.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx b/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx new file mode 100755 index 0000000000..92cf32bb29 Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_sigmoid_fp16.cpp.7462E7274E7DC93D.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx b/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx new file mode 100755 index 0000000000..dc4161aaed Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_softrelu_fp16.cpp.47B66AF94BC3AD6A.idx differ diff --git a/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx b/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx new file mode 100755 index 0000000000..ab8d38683f Binary files /dev/null and b/.cache/clangd/index/convnd_fwd_xdl_tanh_fp16.cpp.A1CCA35C668C7A32.idx differ diff --git a/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx b/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx new file mode 100755 index 0000000000..c5dbc82176 Binary files /dev/null and b/.cache/clangd/index/convolution_backward_data.hpp.F488158732BCE944.idx differ diff --git a/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx b/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx new file mode 100755 index 0000000000..6b8e8c6271 Binary files /dev/null and b/.cache/clangd/index/convolution_backward_data_specialization.hpp.E2F7DA4511290758.idx differ diff --git a/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx b/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx new file mode 100755 index 0000000000..eca7cb0f26 Binary files /dev/null and b/.cache/clangd/index/convolution_backward_weight_specialization.hpp.D1A61821B9657E4F.idx differ diff --git a/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx b/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx new file mode 100755 index 0000000000..0882c97343 Binary files /dev/null and b/.cache/clangd/index/convolution_forward.hpp.8E3EE29C519B7A7E.idx differ diff --git a/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx b/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx new file mode 100755 index 0000000000..f2c450731f Binary files /dev/null and b/.cache/clangd/index/convolution_forward_specialization.hpp.29B75FDB4D9A7ED7.idx differ diff --git a/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx b/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx new file mode 100755 index 0000000000..0bd93c1507 Binary files /dev/null and b/.cache/clangd/index/convolution_host_tensor_descriptor_helper.hpp.C7E0FAE3D228F16B.idx differ diff --git a/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx b/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx new file mode 100755 index 0000000000..386438875c Binary files /dev/null and b/.cache/clangd/index/convolution_parameter.cpp.AB24172BD3A32784.idx differ diff --git a/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx b/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx new file mode 100755 index 0000000000..ae76306977 Binary files /dev/null and b/.cache/clangd/index/convolution_parameter.hpp.98A4E91AB8FFEC38.idx differ diff --git a/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx b/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx new file mode 100755 index 0000000000..2101734dc2 Binary files /dev/null and b/.cache/clangd/index/copy.hpp.D47879DBFF83B386.idx differ diff --git a/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx b/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx new file mode 100755 index 0000000000..dd885df2c0 Binary files /dev/null and b/.cache/clangd/index/data_type.hpp.FA421D61CFEE8D86.idx differ diff --git a/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx b/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx new file mode 100755 index 0000000000..2592dc4756 Binary files /dev/null and b/.cache/clangd/index/data_type_enum.hpp.9C984CB93A76A458.idx differ diff --git a/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx b/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx new file mode 100755 index 0000000000..9c074be25a Binary files /dev/null and b/.cache/clangd/index/debug.hpp.3D8E2A10AC86C578.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx new file mode 100755 index 0000000000..8fcd998f41 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp.8DD0CEE151EEE78C.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx new file mode 100755 index 0000000000..c0cba2ba8a Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp.E45DD97792659A1B.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx new file mode 100755 index 0000000000..8c27027b2d Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp.0A50ACDD7655DC8C.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx new file mode 100755 index 0000000000..114a030838 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp.C65ED37BE0626399.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx new file mode 100755 index 0000000000..8f3ecd0d73 Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp.322ABBBE306F5168.idx differ diff --git a/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx new file mode 100755 index 0000000000..6b6704811c Binary files /dev/null and b/.cache/clangd/index/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp.BE9138723EE82CA7.idx differ diff --git a/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx b/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx new file mode 100755 index 0000000000..a94d1d3e3f Binary files /dev/null and b/.cache/clangd/index/device_avgpool3d_bwd_ndhwc_ndhwc.hpp.B8CFD83A7F04917D.idx differ diff --git a/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx b/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx new file mode 100755 index 0000000000..5732d6b8ea Binary files /dev/null and b/.cache/clangd/index/device_avgpool_bwd.hpp.65CD0007480F503D.idx differ diff --git a/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx b/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx new file mode 100755 index 0000000000..4bba0f4cd9 Binary files /dev/null and b/.cache/clangd/index/device_base.hpp.07667515082DA1A8.idx differ diff --git a/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx b/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx new file mode 100755 index 0000000000..31dcb6393c Binary files /dev/null and b/.cache/clangd/index/device_batched_contraction_multiple_d.hpp.C066C76CB17254D1.idx differ diff --git a/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx b/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx new file mode 100755 index 0000000000..543307e8fa Binary files /dev/null and b/.cache/clangd/index/device_batched_contraction_multiple_d_xdl_cshuffle.hpp.9FB9459677B68FA8.idx differ diff --git a/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx b/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx new file mode 100755 index 0000000000..742f35e896 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm.hpp.0D6DC4AA71BF6793.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx new file mode 100755 index 0000000000..a15625dc53 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.2CCE15A52A6097D9.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx new file mode 100755 index 0000000000..264f593220 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.09BE891BDB3DB7E7.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx b/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx new file mode 100755 index 0000000000..622ee5e30f Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp.FE05EE1022E21C41.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx new file mode 100755 index 0000000000..86617bc719 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.5255F78DB2565F3C.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx new file mode 100755 index 0000000000..a0adfc213a Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.B4883A86B075066E.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx b/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx new file mode 100755 index 0000000000..c48179452a Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm.hpp.FDFBA95BB6D583D3.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx new file mode 100755 index 0000000000..e2dd29da73 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle.hpp.8AECE81C6A31D6CE.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx new file mode 100755 index 0000000000..bf1a549536 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.24C81AB80A523E1D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx new file mode 100755 index 0000000000..16d2c7f352 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp.42B3A408DD25BE6F.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx b/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx new file mode 100755 index 0000000000..81cd5060a5 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multi_d.hpp.65CBFD3787EC8604.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx b/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx new file mode 100755 index 0000000000..cf9895f783 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multi_d_xdl.hpp.5F31589668DB0CDD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx new file mode 100755 index 0000000000..35fe9c96b6 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d.hpp.1EC37BB5D09F3746.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx new file mode 100755 index 0000000000..5163a6be3f Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp.1D083EFB833A8BDD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx new file mode 100755 index 0000000000..4420018cd8 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle.hpp.008517D2C19930C3.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx new file mode 100755 index 0000000000..c279fc209c Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp.3CE3CBFFC33E63B5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx new file mode 100755 index 0000000000..9ee5aec10f Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp.C2D6CBC7BDD03D15.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx new file mode 100755 index 0000000000..0b8a348b57 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp.BE70F5327F5BF3ED.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx new file mode 100755 index 0000000000..3a3f796fda Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp.542AEDD871493BF1.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx new file mode 100755 index 0000000000..02b70043f1 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm.hpp.2CF55436E05C96D5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx new file mode 100755 index 0000000000..2e0801fdac Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute.hpp.0F50C2899670001D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx new file mode 100755 index 0000000000..06b1588d7d Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.72B4F2954B40EC19.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx new file mode 100755 index 0000000000..b227747098 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp.CA655EDB759D211C.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx new file mode 100755 index 0000000000..be8b2c5569 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.54BCAA9E5C4C7CA0.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx new file mode 100755 index 0000000000..9eb0989e92 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp.5F249F65D9D49000.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx new file mode 100755 index 0000000000..dd80163482 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp.9E1D81891D4D775B.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx b/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx new file mode 100755 index 0000000000..b4ddce7eb6 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl.hpp.336993554588B91D.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx new file mode 100755 index 0000000000..5399de0cb8 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp.01B3A3CE0FB07AA4.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx new file mode 100755 index 0000000000..eab90d28fc Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp.6732FB26500DDC13.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx new file mode 100755 index 0000000000..e43f5a3ec0 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp.5C50F8886E987210.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx new file mode 100755 index 0000000000..e003ae8b04 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp.991CBBA160B6AE8A.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx new file mode 100755 index 0000000000..6c8c17ba4e Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp.909733D52FF49021.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx new file mode 100755 index 0000000000..ff38a02f77 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp.EF1B1B5023E691DD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx new file mode 100755 index 0000000000..28f819c0fa Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp.6191F299E76FA25A.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx new file mode 100755 index 0000000000..0141c037b8 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp.1378958A5E1CC674.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx new file mode 100755 index 0000000000..d8138e15dd Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp.155CED9F59B77387.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx new file mode 100755 index 0000000000..28ab0b9f44 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp.021E4B84540D9AAD.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx new file mode 100755 index 0000000000..cd5c1f4caf Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp.2D0A22E72C5CD4C0.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx new file mode 100755 index 0000000000..3f50ada987 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp.6662845BB6111F27.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx new file mode 100755 index 0000000000..35c931eb62 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp.3B9F48B295268F74.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx new file mode 100755 index 0000000000..446385c8b7 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp.0C31B57B423E19C5.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx new file mode 100755 index 0000000000..fb5dbff092 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp.8439FC102CF881D2.idx differ diff --git a/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx new file mode 100755 index 0000000000..34c8066366 Binary files /dev/null and b/.cache/clangd/index/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp.9E2B022D116E5337.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx b/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx new file mode 100755 index 0000000000..deb2d76cdf Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward.hpp.43E337CEA4624B30.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx b/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx new file mode 100755 index 0000000000..a5a01a45fb Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_bf16_instance.cpp.CEBDED0AD1292F09.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx b/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx new file mode 100755 index 0000000000..fd4300cc77 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f16_instance.cpp.D1D5284FB6757AF3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx b/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx new file mode 100755 index 0000000000..f574f4832e Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f32_instance.cpp.CA2117C76C8B866B.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx b/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx new file mode 100755 index 0000000000..dadb38ffcb Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_f64_instance.cpp.B99A50DCD60DF309.idx differ diff --git a/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx b/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx new file mode 100755 index 0000000000..7caed0647c Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_backward_impl.hpp.8B916A92DFA8139E.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx b/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx new file mode 100755 index 0000000000..2141d90fc1 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward.hpp.0E1593921379F2F3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx b/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx new file mode 100755 index 0000000000..f7789255f3 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_bf16_instance.cpp.F51389A9E014ED43.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx b/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx new file mode 100755 index 0000000000..f191e56fa2 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f16_instance.cpp.5B31F4805B2AC1BA.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx b/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx new file mode 100755 index 0000000000..52a0fe9847 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f32_instance.cpp.DAA81C77B8C2B284.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx b/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx new file mode 100755 index 0000000000..b45020cf82 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_f64_instance.cpp.B473866A874EE221.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx b/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx new file mode 100755 index 0000000000..6491ff3a21 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_impl.hpp.6D2CA9CBDDC84AC3.idx differ diff --git a/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx b/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx new file mode 100755 index 0000000000..3c5031a79e Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_forward_impl_obsolete.hpp.7A0467126AEB6502.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx b/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx new file mode 100755 index 0000000000..dfc042df11 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer.hpp.4EDE573CB1DF1297.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx b/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx new file mode 100755 index 0000000000..a15dc51107 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_bf16_instance.cpp.FEC3D08EDEA4CE6B.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx b/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx new file mode 100755 index 0000000000..8eb86213b1 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f16_instance.cpp.B01DECCDEE63A78A.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx b/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx new file mode 100755 index 0000000000..ef2737ba86 Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f32_instance.cpp.889B58511EB200D7.idx differ diff --git a/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx b/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx new file mode 100755 index 0000000000..400f2f459d Binary files /dev/null and b/.cache/clangd/index/device_batchnorm_infer_f64_instance.cpp.0266490535848A23.idx differ diff --git a/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx b/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx new file mode 100755 index 0000000000..2464a976a0 Binary files /dev/null and b/.cache/clangd/index/device_cgemm.hpp.CD6CA3F05D9AAF4D.idx differ diff --git a/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx b/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx new file mode 100755 index 0000000000..f72ee91a34 Binary files /dev/null and b/.cache/clangd/index/device_cgemm_4gemm_xdl_cshuffle.hpp.CB4931718165FBCB.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx b/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx new file mode 100755 index 0000000000..9acab2618a Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gndhwc_3d_instance.cpp.8AF56F1E7AD14DC6.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx b/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx new file mode 100755 index 0000000000..4d6db89ae9 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gnhwc_2d_instance.cpp.8EBF74F132E896AB.idx differ diff --git a/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx b/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx new file mode 100755 index 0000000000..cdba290970 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_gnwc_1d_instance.cpp.C6C85A0C333E012A.idx differ diff --git a/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx b/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx new file mode 100755 index 0000000000..940df9a9af Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_impl.hpp.545E1241F8BEB14B.idx differ diff --git a/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx b/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx new file mode 100755 index 0000000000..ae0eb0a2aa Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_instance.hpp.FA1AC606D0CD0897.idx differ diff --git a/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx b/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx new file mode 100755 index 0000000000..2651b4fc8f Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_ndhwgc_3d_instance.cpp.CBDECCEC6F806814.idx differ diff --git a/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx b/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx new file mode 100755 index 0000000000..94dad8f64f Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_nhwgc_2d_instance.cpp.9A3328B90E30CC03.idx differ diff --git a/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx b/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx new file mode 100755 index 0000000000..a90d1c9068 Binary files /dev/null and b/.cache/clangd/index/device_column_to_image_nwgc_1d_instance.cpp.163F7E8C2077B0E0.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx new file mode 100755 index 0000000000..b75711cca4 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp.79A78F49E52676FD.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx new file mode 100755 index 0000000000..ac82ae08bb Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp.DB04A314807BEF1C.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx new file mode 100755 index 0000000000..c2aa82d390 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp.E624632F1E715961.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx new file mode 100755 index 0000000000..f266e053ca Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp.CACB611E1CE9F1B5.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx new file mode 100755 index 0000000000..c09d48887d Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp.9E3DEE644D650E5A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx new file mode 100755 index 0000000000..f6f9f8744f Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp.384AC715F1A68557.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx new file mode 100755 index 0000000000..d83bd12f79 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp.66D5EE934903111A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx new file mode 100755 index 0000000000..c83594015d Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp.8194F48DCC1F513B.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx new file mode 100755 index 0000000000..3e8f17b950 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp.ED87AF8FD1B46167.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx new file mode 100755 index 0000000000..c53eaf299b Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp.9B28805BAED0F92D.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx new file mode 100755 index 0000000000..24f610b991 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp.7E6A2A9800DD0E7A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx new file mode 100755 index 0000000000..594327118c Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp.E0C6A616E43DC9DD.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx new file mode 100755 index 0000000000..8fc2ae409e Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp.01988F25803E8456.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx new file mode 100755 index 0000000000..5841a9e425 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp.AFEB4718FB33138B.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx new file mode 100755 index 0000000000..6749483006 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp.D913F33B640C756C.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx new file mode 100755 index 0000000000..387a7c6ded Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp.EF41093E7954CFCE.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx new file mode 100755 index 0000000000..acb6a31ef3 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp.CB877B16EA5C28D0.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx new file mode 100755 index 0000000000..091633c603 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp.7AE9E872C7D0A549.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx new file mode 100755 index 0000000000..3f37c7ad45 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp.E366D05CBA9CA229.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx new file mode 100755 index 0000000000..bf87b5d909 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp.F88287BB0527CE6D.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx new file mode 100755 index 0000000000..1a849d1252 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp.03F037ABB1A17197.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx new file mode 100755 index 0000000000..3f92a5e304 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp.237A2B88B920F202.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx new file mode 100755 index 0000000000..1a2c7da533 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp.177CE21E4E683B24.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx new file mode 100755 index 0000000000..8fa891ddf4 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp.FE8F7C3907743A26.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx new file mode 100755 index 0000000000..21d5975b5b Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp.1FB1A94EA79EE13A.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx new file mode 100755 index 0000000000..a14b620c85 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp.6BFB81AFEEB1C1B7.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx new file mode 100755 index 0000000000..07640e52fe Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp.D3FA995D6DF314A6.idx differ diff --git a/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx new file mode 100755 index 0000000000..2e806c0d53 Binary files /dev/null and b/.cache/clangd/index/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp.D00F48772AF74F17.idx differ diff --git a/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx b/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx new file mode 100755 index 0000000000..3fd0c85c74 Binary files /dev/null and b/.cache/clangd/index/device_contraction_instance.hpp.15AB9D8608FA40A0.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx b/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx new file mode 100755 index 0000000000..f7d14160a2 Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_abd.hpp.D0EC339053CFF7D0.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx b/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx new file mode 100755 index 0000000000..b91b2180af Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_abd_xdl_cshuffle.hpp.9AA6B0ABD9EE929D.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx b/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx new file mode 100755 index 0000000000..2682298ab8 Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_d.hpp.CA3D08C20FA799BC.idx differ diff --git a/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx b/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx new file mode 100755 index 0000000000..ac91121f3a Binary files /dev/null and b/.cache/clangd/index/device_contraction_multiple_d_xdl_cshuffle.hpp.18531F18DA84F612.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx new file mode 100755 index 0000000000..c01e8c3066 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp.634AB0569FA8301A.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx new file mode 100755 index 0000000000..02f0d483b1 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp.1229C32BCA5E8056.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx new file mode 100755 index 0000000000..9d809e778f Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp.B21239C1EC74587F.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx new file mode 100755 index 0000000000..176cf4a6ba Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp.92168E842E069568.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx new file mode 100755 index 0000000000..6770016014 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp.8EFB8D272907AC49.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx new file mode 100755 index 0000000000..a8853f85e1 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp.4BAF3FDF22005D6C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx new file mode 100755 index 0000000000..9742712ada Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp.F76E34EF68D8A65B.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx new file mode 100755 index 0000000000..adbdc99880 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp.1704AA4E0EFAB142.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx new file mode 100755 index 0000000000..4ce0e05447 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp.29A1243F9B97E124.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx new file mode 100755 index 0000000000..8747bfe8e9 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp.020B0D68AE892602.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx new file mode 100755 index 0000000000..d219e8aff4 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp.A426959DB1641563.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx new file mode 100755 index 0000000000..2948f37983 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp.1B1383BC1860443B.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx new file mode 100755 index 0000000000..7e2b545b60 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp.C0ABEB6CD7C15E09.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx new file mode 100755 index 0000000000..b1f67dcb1a Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp.76E857B0424F98D1.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx new file mode 100755 index 0000000000..3a9650f2cb Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp.2C4EF55D06D154CA.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx new file mode 100755 index 0000000000..7082d48740 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp.D9C825C0FBC7162C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx new file mode 100755 index 0000000000..9d5a95265d Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp.9ED7E55B00DD6F70.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx new file mode 100755 index 0000000000..f001fd5bcb Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp.87288671AE2510FD.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx new file mode 100755 index 0000000000..b193fa0d52 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp.AA8697A6E53E704C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx new file mode 100755 index 0000000000..d3e8469792 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp.FC0AF91C98199AEA.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx new file mode 100755 index 0000000000..c12d142b72 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp.265462C1E86DC0F1.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx new file mode 100755 index 0000000000..51cf0aed40 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp.32EBBC25AA6C0CF0.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx new file mode 100755 index 0000000000..3da36c72d9 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp.5C1840A2D370420C.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx new file mode 100755 index 0000000000..f5dcda37c7 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp.05BFD4E4455B0BDF.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx new file mode 100755 index 0000000000..3a0b9996ef Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp.BFA2076F1038BB65.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx new file mode 100755 index 0000000000..8732f9fe6c Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp.16DF4E204049F20E.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx new file mode 100755 index 0000000000..01b0bfc2e8 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp.25AD39BA7B2B9C77.idx differ diff --git a/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx new file mode 100755 index 0000000000..0473569119 Binary files /dev/null and b/.cache/clangd/index/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp.F46067E1C6499190.idx differ diff --git a/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx b/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx new file mode 100755 index 0000000000..c2cbd1e1a1 Binary files /dev/null and b/.cache/clangd/index/device_contraction_utils.hpp.39AAD30D5F9C497C.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx new file mode 100755 index 0000000000..b1073d7b65 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp.F95715A59DC357E4.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx new file mode 100755 index 0000000000..68cf27aaa1 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp.11B1653CC880738D.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx new file mode 100755 index 0000000000..e07a6300a7 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp.1FF1E6FFC39B05F6.idx differ diff --git a/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx new file mode 100755 index 0000000000..66a34ea033 Binary files /dev/null and b/.cache/clangd/index/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp.27AC774D5173DB46.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx new file mode 100755 index 0000000000..c6b6e83788 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp.5A99B55004C02041.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx new file mode 100755 index 0000000000..51cc782372 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.38978BB017721061.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx new file mode 100755 index 0000000000..c4c25ff488 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.015682E6D487BC89.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx new file mode 100755 index 0000000000..3e2d48cf35 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.24521662C376153B.idx differ diff --git a/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx new file mode 100755 index 0000000000..e2fe3f761f Binary files /dev/null and b/.cache/clangd/index/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.050ACEE75FFD374F.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx new file mode 100755 index 0000000000..d85338eaac Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp.095F638AA48F9495.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx new file mode 100755 index 0000000000..1e6eaa666d Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp.03A5D53DEE4E2511.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx new file mode 100755 index 0000000000..425f948f5a Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp.FF7059ED53F4F0F2.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx new file mode 100755 index 0000000000..f8183a1fe7 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp.2C9D98DCC57DC34E.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx new file mode 100755 index 0000000000..91c205427a Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp.66D8D8BBD73054E8.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx new file mode 100755 index 0000000000..d5ab954aaf Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp.3FB08C96D8C7394B.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx new file mode 100755 index 0000000000..899c5aff77 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp.FAAE94F7CEBB58BA.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx new file mode 100755 index 0000000000..2ab0852fc7 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp.9102AFA84B67CE1B.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx new file mode 100755 index 0000000000..859313a10d Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp.656EAF4E8A7F404D.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx new file mode 100755 index 0000000000..53885d0cde Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp.793B97DB82C3A02D.idx differ diff --git a/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx new file mode 100755 index 0000000000..43cbd00430 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp.A7E63495E5270E3E.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx b/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx new file mode 100755 index 0000000000..e9ac8a547f Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp.486913CE7740FA6B.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx b/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx new file mode 100755 index 0000000000..2fbe2fad45 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp.8577D499B9483812.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx b/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx new file mode 100755 index 0000000000..750891291d Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_int8_instance.hpp.78D2B94EA941EB43.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx b/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx new file mode 100755 index 0000000000..fe6372ca63 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp.C31E228888CDC52C.idx differ diff --git a/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx b/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx new file mode 100755 index 0000000000..1fdd9321f2 Binary files /dev/null and b/.cache/clangd/index/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp.108EC38462E7A7B4.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx new file mode 100755 index 0000000000..e8f0dd3b11 Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp.7487AFBFF492DC5A.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx new file mode 100755 index 0000000000..ef4799b3cb Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp.B17F2553F57B25A6.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx new file mode 100755 index 0000000000..6cd9679430 Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp.D3A7F488899285AF.idx differ diff --git a/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx new file mode 100755 index 0000000000..a046bf1756 Binary files /dev/null and b/.cache/clangd/index/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp.31CAAB24531BB9D5.idx differ diff --git a/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx b/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx new file mode 100755 index 0000000000..908388d126 Binary files /dev/null and b/.cache/clangd/index/device_conv_bwd_data.hpp.81B90E235167286C.idx differ diff --git a/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx b/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx new file mode 100755 index 0000000000..dbd43ba67a Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd.hpp.257583CFB6A8DC72.idx differ diff --git a/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx b/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx new file mode 100755 index 0000000000..21b3eb1024 Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd_bias_activation.hpp.D4F53CD24AD4278A.idx differ diff --git a/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx b/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx new file mode 100755 index 0000000000..a7f973a12c Binary files /dev/null and b/.cache/clangd/index/device_conv_fwd_bias_activation_add.hpp.D947A059FE31F7BB.idx differ diff --git a/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx b/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx new file mode 100755 index 0000000000..02e20a697c Binary files /dev/null and b/.cache/clangd/index/device_conv_tensor_rearrange.hpp.90268624B8539288.idx differ diff --git a/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx b/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx new file mode 100755 index 0000000000..a086a3c36b Binary files /dev/null and b/.cache/clangd/index/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp.A70DE3BD61066477.idx differ diff --git a/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx b/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx new file mode 100755 index 0000000000..7fcb924eaf Binary files /dev/null and b/.cache/clangd/index/device_elementwise.hpp.00198FF49AAFC017.idx differ diff --git a/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx b/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx new file mode 100755 index 0000000000..8746f85346 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_2d_impl.hpp.30FD009D4EA9FEE5.idx differ diff --git a/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx b/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx new file mode 100755 index 0000000000..f0659dbd90 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_3d_impl.hpp.EA5D0D82E2445BC8.idx differ diff --git a/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx b/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx new file mode 100755 index 0000000000..b152cb6b1e Binary files /dev/null and b/.cache/clangd/index/device_elementwise_impl.hpp.54205731D4AB5FA8.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx b/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx new file mode 100755 index 0000000000..98110b189d Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization.hpp.2F6431A98ECDBEBB.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx b/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx new file mode 100755 index 0000000000..a761d4c1dd Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization_f16_instance.cpp.B8CDDDF357D7D999.idx differ diff --git a/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx b/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx new file mode 100755 index 0000000000..c9bc573c0b Binary files /dev/null and b/.cache/clangd/index/device_elementwise_normalization_impl.hpp.D9648470B11E62DC.idx differ diff --git a/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx b/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx new file mode 100755 index 0000000000..fbd142ba76 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_scale.hpp.A8B4C8DA3A99ADD2.idx differ diff --git a/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx b/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx new file mode 100755 index 0000000000..e13d1a7626 Binary files /dev/null and b/.cache/clangd/index/device_elementwise_scale_impl.hpp.792CB7A86911D632.idx differ diff --git a/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx b/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx new file mode 100755 index 0000000000..c28118b711 Binary files /dev/null and b/.cache/clangd/index/device_gemm.hpp.C11C88B2080F572F.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx new file mode 100755 index 0000000000..c83434fafd Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.236524D86BD9A065.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx new file mode 100755 index 0000000000..223bcbe35d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.3F5C595E84E47B20.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx new file mode 100755 index 0000000000..0ad5910275 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.5836A0C2B3A14787.idx differ diff --git a/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx new file mode 100755 index 0000000000..3d16fde17d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.E0256C2451B1EE93.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx new file mode 100755 index 0000000000..8b024a024d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.EED2788EEB0B75F1.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx new file mode 100755 index 0000000000..eae72dcfa4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.F71CF786157F3658.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx new file mode 100755 index 0000000000..633ba972cd Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.777F64009D887A71.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx new file mode 100755 index 0000000000..e66f128f60 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.9B8C2015B4635A97.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx new file mode 100755 index 0000000000..43e827f788 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.7475A9472170E32A.idx differ diff --git a/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx new file mode 100755 index 0000000000..7bf0a8cef4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5CFCD880070835B7.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx new file mode 100755 index 0000000000..e68440a9f9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp.15161CADD4CF1397.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx new file mode 100755 index 0000000000..078d3d3f18 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp.C192E44A2A2B2438.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx new file mode 100755 index 0000000000..fd398e7c10 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.0E4AE68E13A6F5B4.idx differ diff --git a/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx new file mode 100755 index 0000000000..a10692619d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.3B87280201C19511.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx new file mode 100755 index 0000000000..15dea6fb96 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp.E948F8CEDD1EC4BF.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx new file mode 100755 index 0000000000..aa062a145d Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp.0BA2333B2B00FEDC.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx new file mode 100755 index 0000000000..31e20d1019 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp.F5D89038BFC386C8.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx new file mode 100755 index 0000000000..78a86e8962 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp.935CC5A9EFAF5617.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx new file mode 100755 index 0000000000..86f594f8fa Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.81A12F337280E714.idx differ diff --git a/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx new file mode 100755 index 0000000000..41883f77db Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.6BB8AF1DE23C88B0.idx differ diff --git a/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx new file mode 100755 index 0000000000..3e88ec4e8e Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.6559BA68564A041C.idx differ diff --git a/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx new file mode 100755 index 0000000000..c79df4efaa Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.5F5C907654F27409.idx differ diff --git a/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx new file mode 100755 index 0000000000..766c156be9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp.8D291954B92003AF.idx differ diff --git a/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx new file mode 100755 index 0000000000..88403271dc Binary files /dev/null and b/.cache/clangd/index/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp.9FCCE6CADB35B88C.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx new file mode 100755 index 0000000000..d0bf400cb1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.479D1366D0F3CE30.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx new file mode 100755 index 0000000000..68a19236c4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.D6BBA9767E8DA592.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx new file mode 100755 index 0000000000..b1efd0bd05 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.F658EA6412E57274.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx new file mode 100755 index 0000000000..d33170e119 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.D219505C6A3BDF13.idx differ diff --git a/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx b/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx new file mode 100755 index 0000000000..2070d7f542 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bias_add_reduce_xdl_cshuffle.hpp.2AAE1676CF63D1B6.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx new file mode 100755 index 0000000000..ba1af861e3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp.4E5648F8B76EED28.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx new file mode 100755 index 0000000000..bf1f37f916 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp.51C1492BB7273708.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx new file mode 100755 index 0000000000..87e5bdee01 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp.89C496A745057432.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx new file mode 100755 index 0000000000..fd5d15c058 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp.DA4EFE2E10A56944.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx new file mode 100755 index 0000000000..362f06a72f Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp.80BE80F117BA699A.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx new file mode 100755 index 0000000000..d7f1457738 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp.E76894F842B8152E.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx new file mode 100755 index 0000000000..3d85705d8b Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp.741D3DF3032F48B6.idx differ diff --git a/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx new file mode 100755 index 0000000000..848d751cc4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp.E3C0C7076B1A5420.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx b/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx new file mode 100755 index 0000000000..7438ad4a5f Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp.hpp.86A83F148A978F3B.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx new file mode 100755 index 0000000000..ac690f6bfb Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp.AC3765D99685AA83.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx new file mode 100755 index 0000000000..1e2c6fcd36 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp.BE2169842E629D5F.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx new file mode 100755 index 0000000000..68c67561f6 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp.C525CCAE82B88202.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx new file mode 100755 index 0000000000..9ee5d5b508 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp.E70AE989E444006E.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx new file mode 100755 index 0000000000..938419e3cc Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp.F48BB29B1F2C3D07.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx new file mode 100755 index 0000000000..f513997e26 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.9E7DF32B7E3B06F9.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx new file mode 100755 index 0000000000..23c19d8bc9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp.8E876A3BCC68461F.idx differ diff --git a/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx new file mode 100755 index 0000000000..f6f4efbf17 Binary files /dev/null and b/.cache/clangd/index/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F358D66C9DE4EC29.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx new file mode 100755 index 0000000000..6fe332601f Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.B5661BE447C6D950.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx new file mode 100755 index 0000000000..3bb46e607b Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.E716A8264A34B5D8.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx new file mode 100755 index 0000000000..2cb7364112 Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.B8E2B7EF254C9C8D.idx differ diff --git a/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx new file mode 100755 index 0000000000..fdd230bbd1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.4104F408858A9B41.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx b/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx new file mode 100755 index 0000000000..9979312f95 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_abd.hpp.01CABF5AC7B8D71E.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx b/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx new file mode 100755 index 0000000000..a73f60982d Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_abd_xdl_cshuffle.hpp.6B55B3AF2C69EA61.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx b/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx new file mode 100755 index 0000000000..980ec74839 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d.hpp.A369BE7D23825A77.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx b/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx new file mode 100755 index 0000000000..774fb1235d Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_layernorm.hpp.1996D4382B7902A4.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx b/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx new file mode 100755 index 0000000000..74c94420ea Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp.6897EB3658A7024F.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx b/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx new file mode 100755 index 0000000000..3b1ebace70 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_multiple_r.hpp.D51C7B3444D17F61.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx b/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx new file mode 100755 index 0000000000..28dd35d2d3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.76B6E1492C6F98CC.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx b/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx new file mode 100755 index 0000000000..1960392bb3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_wmma_cshuffle.hpp.5BE3B8B060B3F7DD.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx new file mode 100755 index 0000000000..a073ccd309 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle.hpp.A234525D7676EE3C.idx differ diff --git a/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx new file mode 100755 index 0000000000..d5c706af3c Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.D18AD937E7A47E2A.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx new file mode 100755 index 0000000000..34046311ab Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp.46B874AA40214074.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx new file mode 100755 index 0000000000..2733245b38 Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp.0EE0667EFBB4A693.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx new file mode 100755 index 0000000000..a45cc88f7d Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp.4B2A61CBEBB69C46.idx differ diff --git a/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx new file mode 100755 index 0000000000..ce3d33aa9b Binary files /dev/null and b/.cache/clangd/index/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp.E45F1434A64DD3E5.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx new file mode 100755 index 0000000000..6cb217abda Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp.35BB7F78B886DD68.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx new file mode 100755 index 0000000000..85b9cc6bb7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.6F38C058A13FF1D3.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx new file mode 100755 index 0000000000..5f051e6ab0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.0B3E6146A79A7808.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx new file mode 100755 index 0000000000..f12a5373a7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.487F7DBAC716F5E6.idx differ diff --git a/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx new file mode 100755 index 0000000000..846dadfcc8 Binary files /dev/null and b/.cache/clangd/index/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.0D311D4C9F7E0828.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx b/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx new file mode 100755 index 0000000000..02cac28c27 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce.hpp.536083621BE180B4.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx new file mode 100755 index 0000000000..3e2ccc81a1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle.hpp.E3AE2B4768599A5C.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx new file mode 100755 index 0000000000..fc1676cf58 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp.7921F720A3C738B2.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx new file mode 100755 index 0000000000..1bdf3a987a Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp.67A4E982DE011DE5.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx new file mode 100755 index 0000000000..f28c28dda1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp.18986AD46051D542.idx differ diff --git a/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx new file mode 100755 index 0000000000..3f30237b4d Binary files /dev/null and b/.cache/clangd/index/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp.93B77417ED344C2F.idx differ diff --git a/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx b/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx new file mode 100755 index 0000000000..12132378d9 Binary files /dev/null and b/.cache/clangd/index/device_gemm_splitk.hpp.CFD1B5B13FAD53CD.idx differ diff --git a/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx b/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx new file mode 100755 index 0000000000..6b7f740fee Binary files /dev/null and b/.cache/clangd/index/device_gemm_streamk.hpp.75A52E82DB5C165B.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx b/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx new file mode 100755 index 0000000000..24298ab10f Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma.hpp.32E8352ACBB1259F.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx new file mode 100755 index 0000000000..faf863067f Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp.48B71A3DBBF3DF70.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx new file mode 100755 index 0000000000..03f25b73ac Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp.9F95D8CF713B4DFB.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx new file mode 100755 index 0000000000..39e7f7f400 Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp.4656D94005983107.idx differ diff --git a/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx new file mode 100755 index 0000000000..117769d3b0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp.92DF213C04CE83E0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx b/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx new file mode 100755 index 0000000000..8f6ffd0d2b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl.hpp.8498EA503EEB08EE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx new file mode 100755 index 0000000000..f2b198cd46 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp.F73590DBD2CB1EF5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx new file mode 100755 index 0000000000..a56065d3cd Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp.544964C8084D9197.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx new file mode 100755 index 0000000000..c34436ac5b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp.6E89B9B9359CE514.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx new file mode 100755 index 0000000000..aa88835956 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp.2346E4E4EAE3DC7F.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx new file mode 100755 index 0000000000..46dd055c5e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp.DCA2A91E3CF88135.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx new file mode 100755 index 0000000000..419aa484c4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp.52C4510F756E0106.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx new file mode 100755 index 0000000000..01a0c17c21 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp.11CF3D24C90FB35A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx new file mode 100755 index 0000000000..2f8932c498 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp.3180F054887BBB56.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx new file mode 100755 index 0000000000..c5e074c84a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp.CA124F6888DE0DFE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx new file mode 100755 index 0000000000..d39e9b15d3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp.5B0B95461D582E5E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx new file mode 100755 index 0000000000..2569757086 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp.144394AC7C436910.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx new file mode 100755 index 0000000000..d101717ed4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp.9C26807545ECBC7F.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx new file mode 100755 index 0000000000..11f9ac7d2d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp.48B6F9C0E69BB63A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx new file mode 100755 index 0000000000..4d96fd6b5c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp.FF4791DC4DC38C18.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx new file mode 100755 index 0000000000..c7d5f8dbae Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp.6D261DF3AD15EA25.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx new file mode 100755 index 0000000000..4cac5b37b1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp.FA9B7FC9807074DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx new file mode 100755 index 0000000000..eced73fe4b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp.E10E580D456628E1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx new file mode 100755 index 0000000000..125ac5b2bd Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp.030C33F297B655BC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx new file mode 100755 index 0000000000..56a26dd6e1 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp.C8A37285FABA85B1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx new file mode 100755 index 0000000000..33cce0cb45 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp.1333260C2CA69DB3.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx new file mode 100755 index 0000000000..278de03dbe Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp.B5F4680BE43A28CE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx new file mode 100755 index 0000000000..fbf55208db Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp.4093668F9B2A46D0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx new file mode 100755 index 0000000000..17e95b747e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp.A50E70F0D93B6F8E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx new file mode 100755 index 0000000000..a3db28d1a6 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp.74A0C6504550F5D4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx new file mode 100755 index 0000000000..97a5cf453c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp.A977355E082D2CA2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx new file mode 100755 index 0000000000..6bb143ea47 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp.2559A4A420F11829.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx new file mode 100755 index 0000000000..60e3075fbf Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp.513C881E10886D27.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx new file mode 100755 index 0000000000..d49faec733 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp.43610B5AFBA5C7DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx new file mode 100755 index 0000000000..17365c9802 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp.448FFB8EF559CCD0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx new file mode 100755 index 0000000000..ee36d671d0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6CE81023C2E130B0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx new file mode 100755 index 0000000000..7dd5f5275e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp.62E7711AB97BA061.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx new file mode 100755 index 0000000000..c5d0c0725e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp.FC50F49FB1ACD418.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx new file mode 100755 index 0000000000..511d1a6ea3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp.29C6735FE1834089.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx new file mode 100755 index 0000000000..8e13281a5c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp.2E316B49437298E2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx new file mode 100755 index 0000000000..13e853f1b7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle.hpp.54159ABC735BA0BB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx new file mode 100755 index 0000000000..70dc378421 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle_lds_direct_load.hpp.C2FB5C5C6E8BC96E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx b/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx new file mode 100755 index 0000000000..8328c74ba6 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_cshuffle_v2.hpp.6D300E1E67F0C8FA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx new file mode 100755 index 0000000000..e4ac81032a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp.9F68ED656E988D30.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx new file mode 100755 index 0000000000..d96f25739d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp.FA79EAE16FB6F44E.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx new file mode 100755 index 0000000000..e9328383f7 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp.F19AC2A6D8DD58E6.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx new file mode 100755 index 0000000000..e00d3f16d3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp.478DBC5276E71817.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx new file mode 100755 index 0000000000..659176bd28 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp.1759B855C9E56265.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx new file mode 100755 index 0000000000..b66c6dbde0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp.9CBE0B752A41740B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx new file mode 100755 index 0000000000..9cec501899 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp.2A9A70463B0B1D69.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx new file mode 100755 index 0000000000..6a62f99a2d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp.E46BEEC813D33535.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx b/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx new file mode 100755 index 0000000000..f7af9a00b2 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_layernorm_cshuffle.hpp.7536F92E23B638A6.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx b/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx new file mode 100755 index 0000000000..278f35dd60 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_skip_b_lds.hpp.31130252724E07CF.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx new file mode 100755 index 0000000000..63246b85bc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle.hpp.D2F306B1868E426C.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx new file mode 100755 index 0000000000..a3fc917b14 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp.C1DEA5CF04C380FB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx new file mode 100755 index 0000000000..68b33656fc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp.5C4328570228C900.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx new file mode 100755 index 0000000000..b78f114d23 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp.8E11BCA1E8B40C54.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx new file mode 100755 index 0000000000..8f8579630d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp.02602D130B49C5E9.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx new file mode 100755 index 0000000000..043cf81f0e Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp.76573A416742936A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx new file mode 100755 index 0000000000..4b8fc1f16d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp.80C5AB0FEC7DFC63.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx new file mode 100755 index 0000000000..dec47fbf6d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp.073EE92A5EAA844A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx new file mode 100755 index 0000000000..abbe2d584a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp.98C08C6647949567.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx new file mode 100755 index 0000000000..603cd1b658 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.436428D0A84C0AFE.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx new file mode 100755 index 0000000000..009a4c3baa Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp.5798AD2D53C823E4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx new file mode 100755 index 0000000000..45ff2bc4b0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp.34C01D1CCA40C5F9.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx new file mode 100755 index 0000000000..2596bf2060 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp.2C6EF29B32E3350B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx new file mode 100755 index 0000000000..1b744d0e9d Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp.483FB0E67DC82A12.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx new file mode 100755 index 0000000000..4dd2a49c96 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp.04904E13881A008D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx new file mode 100755 index 0000000000..7ccdbc93cc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp.BF7C969BEF21A812.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx new file mode 100755 index 0000000000..50edc4f7f2 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp.11676556038472A5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx new file mode 100755 index 0000000000..52d9d83c82 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp.E685A8514CD614D5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx new file mode 100755 index 0000000000..276951e0f0 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp.02D5EB3BF76A9EDC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx new file mode 100755 index 0000000000..794a3c1b0a Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp.235E5C5B653C03E0.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx new file mode 100755 index 0000000000..d065b34c36 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp.37CD0D23BDED400B.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx new file mode 100755 index 0000000000..5f129eb71f Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp.2924213EB866E058.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx new file mode 100755 index 0000000000..1063932150 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp.3FB0D5B09C446777.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx new file mode 100755 index 0000000000..440b0cd74f Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp.7D14F328651204D1.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx new file mode 100755 index 0000000000..217d397ea6 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp.8A9B71EB3B2C8B82.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx new file mode 100755 index 0000000000..974952dadc Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp.CF381829133E9AFB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx new file mode 100755 index 0000000000..4d2b66e4f3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp.117C2517C948C007.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx new file mode 100755 index 0000000000..be942abff4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp.C13174B8631F1EA5.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx new file mode 100755 index 0000000000..3d5815f56c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp.8596E811350866B4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx new file mode 100755 index 0000000000..90d0939061 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp.09DE6214A3D6FEFC.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx new file mode 100755 index 0000000000..829ae55a62 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp.515130D1DB38B8AB.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx new file mode 100755 index 0000000000..5ea6218b0c Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp.252C84373128A03D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx new file mode 100755 index 0000000000..867fcbb5fb Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp.6BA0BD5269CBABBA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx new file mode 100755 index 0000000000..94e3ce438b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp.CFD1881950858A56.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx new file mode 100755 index 0000000000..91d0f1a254 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp.9CC79FF6A90215E2.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx new file mode 100755 index 0000000000..f417033e30 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp.577D62F548B80AD3.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx new file mode 100755 index 0000000000..2075c68304 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp.FC0BE921C33562AA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx new file mode 100755 index 0000000000..29f532be88 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp.5E4A00F1466A4D4D.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx new file mode 100755 index 0000000000..c54d140bd4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp.64CDFD8CF1246F3A.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx new file mode 100755 index 0000000000..c5554e1456 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp.38A1F46E05CA49C4.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx b/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx new file mode 100755 index 0000000000..f55842224b Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp.6DFDC664FAC2C818.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx b/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx new file mode 100755 index 0000000000..e56552be45 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_streamk.hpp.424C1309002226DA.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx b/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx new file mode 100755 index 0000000000..46b6915ed3 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp.16580B63D7D6888C.idx differ diff --git a/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx b/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx new file mode 100755 index 0000000000..213dca0ac4 Binary files /dev/null and b/.cache/clangd/index/device_gemm_xdl_waveletmodel_cshuffle.hpp.6149EF88F8FEFF53.idx differ diff --git a/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx b/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx new file mode 100755 index 0000000000..36d4244faa Binary files /dev/null and b/.cache/clangd/index/device_grouped_contraction_multiple_d.hpp.D8736023CD2927DC.idx differ diff --git a/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx b/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx new file mode 100755 index 0000000000..5ecbcfc770 Binary files /dev/null and b/.cache/clangd/index/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp.A6618A549365EFCF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx new file mode 100755 index 0000000000..af1ae5bec6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.A4DBB4F8B2C78E56.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx new file mode 100755 index 0000000000..c812c861b6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.315EC2A3A6D6E638.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx new file mode 100755 index 0000000000..e0b46eafc2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.AA168C235A47B299.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx new file mode 100755 index 0000000000..9c86645709 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp.FA613A1A341E3811.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx new file mode 100755 index 0000000000..6cf805a8fb Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp.F48B669764788B12.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx new file mode 100755 index 0000000000..f7f0c76b1a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp.B296FF7D04FDFC90.idx differ diff --git a/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx new file mode 100755 index 0000000000..d5f65c9d14 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp.DDED95C8C9D22159.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx new file mode 100755 index 0000000000..251fc60ead Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.15D71B57C7DA4CC3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx new file mode 100755 index 0000000000..5d85d8f702 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.1D323BBA44E4F194.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx new file mode 100755 index 0000000000..fba666d07c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.900C533B3DB605B7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx new file mode 100755 index 0000000000..cb445fed79 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.5AFEEB9815EAB2BB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx new file mode 100755 index 0000000000..bc08aeea6c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.BC2C6EDB175EB210.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx new file mode 100755 index 0000000000..5935c162de Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.8A9EF4FB27AD09E7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx new file mode 100755 index 0000000000..a34e1b08e6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.DE2A54B24DD9CFDF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx new file mode 100755 index 0000000000..f2115b0162 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.EBA14072309C61B4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx new file mode 100755 index 0000000000..95bbb25faa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.5B618BE41EFBBD23.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx new file mode 100755 index 0000000000..bc2186bfb7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.6161CF2711103FB3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx new file mode 100755 index 0000000000..1362c78f92 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.77ED46D094B0B411.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx new file mode 100755 index 0000000000..9b37d375e7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.07FF698C2C4EA5B3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx new file mode 100755 index 0000000000..1e80350eea Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.1FAF079E735BA4BC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx new file mode 100755 index 0000000000..942505d100 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.FD07473382303B16.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx new file mode 100755 index 0000000000..643d64a7aa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.B6F367ED6FE27214.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx new file mode 100755 index 0000000000..0732144486 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.BE1B4F87BC348C49.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx new file mode 100755 index 0000000000..9699169ee8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.F02158140F1CDE9D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx new file mode 100755 index 0000000000..076ed46fd7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.9325EE22E541E919.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx new file mode 100755 index 0000000000..9d5c2ab70a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.BBF8A6B4ABA52D27.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx new file mode 100755 index 0000000000..f444d4b643 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.CE7231939A842BC9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx new file mode 100755 index 0000000000..d55f2b519c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp.5A5CAFE59DAED728.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx new file mode 100755 index 0000000000..6c57d54ea5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp.B9B0155CE4F94655.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx new file mode 100755 index 0000000000..cff8e92891 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp.F1D87E46323F05EC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx new file mode 100755 index 0000000000..43f502c722 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp.B674F131151301E0.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx new file mode 100755 index 0000000000..8ec8059153 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp.E06437315FCA8871.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx new file mode 100755 index 0000000000..34f2304c15 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp.159F0712016AF057.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx new file mode 100755 index 0000000000..162b541d72 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp.4C82308AB1780753.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx new file mode 100755 index 0000000000..73c15dfafc Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp.872D44B53EF52F10.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx new file mode 100755 index 0000000000..d216ce4118 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp.83CDF2CC13C4A415.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx new file mode 100755 index 0000000000..6abf02a098 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp.A4A9E7E93B446A25.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx new file mode 100755 index 0000000000..f1551a5549 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp.729E5DD280A9E536.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx new file mode 100755 index 0000000000..55c6115284 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp.649F61751C48D48D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx new file mode 100755 index 0000000000..cc5dc9dda0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp.8A0C33E6B1E03B1D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx new file mode 100755 index 0000000000..1c122fdd68 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp.8C4CEC479CC69615.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx new file mode 100755 index 0000000000..1953972b69 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp.D16C834304C1756E.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx new file mode 100755 index 0000000000..f2c3bd813c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp.1399C252DECC9B8A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx new file mode 100755 index 0000000000..63bf47e7b9 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp.3060AD718DAF4F4F.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx new file mode 100755 index 0000000000..76ccc1117a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp.8A08D10AF6807EE2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx new file mode 100755 index 0000000000..b066f8d4df Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp.FE154FD36F572C54.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx new file mode 100755 index 0000000000..bf92734dd7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp.56FD421A51C8C5E1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx new file mode 100755 index 0000000000..f56ac5818d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp.C29BF8AC2D59E669.idx differ diff --git a/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx new file mode 100755 index 0000000000..6eb4a8d103 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp.9341E131FE1393D1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx new file mode 100755 index 0000000000..2684502535 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.11490B4C8E752971.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx new file mode 100755 index 0000000000..a49d19b6aa Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.ED0E8CE7C6A61531.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx new file mode 100755 index 0000000000..b008b50fbf Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5ED435B77281FBCD.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx new file mode 100755 index 0000000000..ad5fa11289 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.069AF0356549719D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx new file mode 100755 index 0000000000..02fedb695b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.511C2FDB97E265FC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx new file mode 100755 index 0000000000..a677cb2d62 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.32DD26A1AFC28FC7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx new file mode 100755 index 0000000000..39f1841025 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.72C23D8E55C505F5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx new file mode 100755 index 0000000000..7b08c378f0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.80E125D05CA137B9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx new file mode 100755 index 0000000000..232e13b8fe Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.663BA57C2C8487A1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx new file mode 100755 index 0000000000..d03305666e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.FDD27F96EE937A50.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx new file mode 100755 index 0000000000..5885bec4d7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.33C6E967995C7F50.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx new file mode 100755 index 0000000000..2b6a79c1e8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.59DE1C78AB7D5F80.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx new file mode 100755 index 0000000000..0952cde8b8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.F3F4F9DA4C0F8BDA.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx new file mode 100755 index 0000000000..7246b0fcbd Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.A371B626E37E8E07.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx new file mode 100755 index 0000000000..d69c3185b2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.AB40F2DD408B7367.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx new file mode 100755 index 0000000000..8556d42bbc Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.18F54267E4ABEC46.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx new file mode 100755 index 0000000000..68b38b8a82 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.192FAAC8A46AFF2C.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx new file mode 100755 index 0000000000..1bd12f4cce Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp.F56F1203139F8DB9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx new file mode 100755 index 0000000000..abec3628c9 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.4C04B7436EBF8811.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx new file mode 100755 index 0000000000..54db1ed904 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.D8BA94FD49F7A399.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx new file mode 100755 index 0000000000..21ad6329a1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C14D5D3E331E58A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx new file mode 100755 index 0000000000..15bde06a44 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.066E2D36BE87D012.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx new file mode 100755 index 0000000000..e00d96c3b5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.341F4486D5AB6E5A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx new file mode 100755 index 0000000000..082cd833d7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.5A4CE3D80E2AF506.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx new file mode 100755 index 0000000000..9cc1dfb06b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.403AE32B0F021E71.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx new file mode 100755 index 0000000000..0fe8793841 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.9C8F172FA791F403.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx new file mode 100755 index 0000000000..148099c580 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.2C419A4DCD2F2168.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx new file mode 100755 index 0000000000..a71092c3a3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.217BA51B10EED1BA.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx new file mode 100755 index 0000000000..4fa0958471 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.CE8AF0B7DC8618CC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx new file mode 100755 index 0000000000..2539d7d22c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.8B802336DBA274D4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx new file mode 100755 index 0000000000..aeb9b1607d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.17BBA75937C47055.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx new file mode 100755 index 0000000000..c336939694 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.9C7E5EE7984A7775.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx new file mode 100755 index 0000000000..23a7bbc48f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.15DDB04E41B23392.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx new file mode 100755 index 0000000000..acf2197547 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp.C031B76FFFB0A20D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx new file mode 100755 index 0000000000..933a2277d3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.C7145A60489EBDB5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx new file mode 100755 index 0000000000..fd55c7f849 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C5AE3E9229A8EFB4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx new file mode 100755 index 0000000000..465fc541e4 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp.8F52AF019F02B944.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx new file mode 100755 index 0000000000..113a03e726 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp.3C3BEC3EC7808D5A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx new file mode 100755 index 0000000000..098bb82fe2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.2554570633260D87.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx new file mode 100755 index 0000000000..62ef0954b9 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp.D8F2815AC5D0D7D6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx new file mode 100755 index 0000000000..de60f391d2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp.0FC1DC5FE52A0C60.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx new file mode 100755 index 0000000000..88485d710a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp.0151E7B579E72F93.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx new file mode 100755 index 0000000000..8bad777c3c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp.C058D2A1DEE10686.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx new file mode 100755 index 0000000000..c59c78b58e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp.278F3C311504F9EC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx new file mode 100755 index 0000000000..5481154616 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp.8DE51E7D513808A9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx new file mode 100755 index 0000000000..8d46b8ee94 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp.B3BD1E3E3F6E4877.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx new file mode 100755 index 0000000000..fc6f51d6da Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3955AFFBF9D5A2EF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx new file mode 100755 index 0000000000..ea24c85695 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp.C97D071C6295D5F3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx new file mode 100755 index 0000000000..6f7643f8d8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp.E621A4FA60DE216B.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx new file mode 100755 index 0000000000..aa7a7231bd Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp.06F3665DC4884ADD.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx new file mode 100755 index 0000000000..2ae0b599b1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp.8A48A77FE8BA877A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx new file mode 100755 index 0000000000..c72734627f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp.85809BA7EA818954.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx new file mode 100755 index 0000000000..06ac96331e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.FB2E0A9CD009454C.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx new file mode 100755 index 0000000000..337051c91a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.21AF094B4B66A48D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx new file mode 100755 index 0000000000..c69162528d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.D78BE8BB81161A14.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx new file mode 100755 index 0000000000..3194548487 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.143C90F3DEDC2341.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx new file mode 100755 index 0000000000..7fd04b4823 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp.426F56794F68AE44.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx new file mode 100755 index 0000000000..b92e640c5b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp.1C2FE2C88C10C9D6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx new file mode 100755 index 0000000000..1241ecb562 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp.BB1A75845E7DE245.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx new file mode 100755 index 0000000000..31fa84a2ec Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp.A1E84AD887D755DB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx new file mode 100755 index 0000000000..a81cda35c3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.43CB1A6BDABED53D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx new file mode 100755 index 0000000000..6ecb7e5ea7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp.336E2C49F8565116.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx new file mode 100755 index 0000000000..fa4464ae03 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.117E24CA3DC21528.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx new file mode 100755 index 0000000000..a5ea514f8b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.59CFAC251237A8A2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx new file mode 100755 index 0000000000..f3e1cdc272 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B76F39C8DBBE2FD2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx new file mode 100755 index 0000000000..cb5d1e0def Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.9F991AFB3119C4B9.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx new file mode 100755 index 0000000000..dde2eb592d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.A3477CD2DF481BB7.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx new file mode 100755 index 0000000000..2e9354759f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.C60AD9A6203D4F40.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx new file mode 100755 index 0000000000..0ea6f44940 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.B74C847DC291EB3D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx new file mode 100755 index 0000000000..702365b4ce Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.91739D7EC5D8C4E5.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx new file mode 100755 index 0000000000..62eecf9d13 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.612D072BD9DA9C78.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx new file mode 100755 index 0000000000..67a6b90b1c Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.91C96AAC7F5772DF.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx new file mode 100755 index 0000000000..b256627d32 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.A06BD8EFCD977C2D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx new file mode 100755 index 0000000000..92f36103e5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp.0F35369ACDDEB6D2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx new file mode 100755 index 0000000000..9fe62fba00 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp.3DF5C2CC3145E799.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx new file mode 100755 index 0000000000..7e603dfdd0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp.B3BE2F1E169224A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx new file mode 100755 index 0000000000..0140ef4eb2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp.ED09CF7C1628C3B6.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx new file mode 100755 index 0000000000..1a8bdc1f85 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d.hpp.CB0E4FDBA2354B83.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx new file mode 100755 index 0000000000..fb08a94c3a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp.62AEEAEB4FE47662.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx new file mode 100755 index 0000000000..762868bb73 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp.DA7CD25895EF8940.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx new file mode 100755 index 0000000000..feb8f66eca Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_f16_instance.hpp.FE4A66EFBF344CEC.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx new file mode 100755 index 0000000000..b40a34ad4a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_wmma_i8_instance.hpp.22BF4E121F8B1D63.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx new file mode 100755 index 0000000000..146798544a Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp.8935210F01B214E2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx new file mode 100755 index 0000000000..ef82df0d6f Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_instance.hpp.7A273F666FE77357.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx new file mode 100755 index 0000000000..d25ebcfe9b Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_data_xdl_scale_instance.hpp.A306BD940C6CC8E3.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx new file mode 100755 index 0000000000..831fc9e040 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight.hpp.FB767EE60994A05E.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx new file mode 100755 index 0000000000..b627f54ffe Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp.2CEBA1D92F4EDC1A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx new file mode 100755 index 0000000000..e926f551d0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_wmma_instance.hpp.D8A887BE715EE7ED.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx new file mode 100755 index 0000000000..295f53117d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp.7B0144E3F93B6769.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx new file mode 100755 index 0000000000..8c93763ad6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_bwd_weight_xdl_instance.hpp.DF771DCE13775534.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx new file mode 100755 index 0000000000..d1baa52097 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd.hpp.ED9472BD269F359D.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx new file mode 100755 index 0000000000..cb3f51f4ff Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp.52A5FB3B86490259.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx new file mode 100755 index 0000000000..9d26b8e93e Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d.hpp.79D64EC0672BD2D2.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx new file mode 100755 index 0000000000..cc53f0da45 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r.hpp.4DCB681EEBE1DA60.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx new file mode 100755 index 0000000000..33afdfd040 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp.CF5C02CB8B4910C1.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx new file mode 100755 index 0000000000..a878430c41 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp.EC51DA2544106248.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx b/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx new file mode 100755 index 0000000000..47c0b2e133 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_wmma_instance.hpp.377DCBF55D8BBCBB.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx new file mode 100755 index 0000000000..0ef24882f5 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_bilinear_instance.hpp.4827161718A2E399.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx new file mode 100755 index 0000000000..44b29922f7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_instance.hpp.DE209B7BB2124A4A.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx new file mode 100755 index 0000000000..215542b142 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scale_instance.hpp.F388F6485863F7A4.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx new file mode 100755 index 0000000000..a36df96b39 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp.043570DA9A94B804.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx new file mode 100755 index 0000000000..bfb90c0a14 Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp.8003C44E683CA082.idx differ diff --git a/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx b/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx new file mode 100755 index 0000000000..10ea0f430d Binary files /dev/null and b/.cache/clangd/index/device_grouped_conv_utils.hpp.82861A708A167591.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx b/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx new file mode 100755 index 0000000000..ab02bf01c8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm.hpp.96241388E9D6D4CD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx new file mode 100755 index 0000000000..6f6f303c6c Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp.B5F90340138DCBDD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx new file mode 100755 index 0000000000..a084a82038 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp.BC9F6A61DFACFE50.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx new file mode 100755 index 0000000000..f532a55879 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.F7CCE132C2B5DC0C.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx new file mode 100755 index 0000000000..b70f0cc02a Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.9DF3794E443669AC.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx b/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx new file mode 100755 index 0000000000..3feaab5c0c Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_fixed_nk.hpp.6F8F8F18341308F9.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx new file mode 100755 index 0000000000..400472e974 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute.hpp.3C2429E11C534BCE.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx new file mode 100755 index 0000000000..de53f1b720 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp.75AE4D40F988002C.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx b/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx new file mode 100755 index 0000000000..77f39df683 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_splitk.hpp.6F08C0BD625D58A2.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx b/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx new file mode 100755 index 0000000000..721a30ab4e Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl.hpp.46AD692E57980C90.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx new file mode 100755 index 0000000000..7afaf0be7d Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp.37D37DC699ECCA33.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx new file mode 100755 index 0000000000..31e701f679 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp.1D6C6BE3C31CA169.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx new file mode 100755 index 0000000000..51d7446a6b Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp.24FD07A2963099BE.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx new file mode 100755 index 0000000000..951c490186 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp.32B3D11C83B5AEA6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx new file mode 100755 index 0000000000..9eb6fecae2 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk.hpp.4C6A715EB09F77E1.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx new file mode 100755 index 0000000000..1fda3304ad Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp.47280D4E92EC5D1F.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx new file mode 100755 index 0000000000..0a9211148c Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp.1DAF74A63A4FD2D6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx new file mode 100755 index 0000000000..6364304ca8 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp.CD8E0FEB6A298EEB.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx new file mode 100755 index 0000000000..fbedd0aa52 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp.C65F333685492D7B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx new file mode 100755 index 0000000000..5e6a50f614 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp.D0AB01FA41C26CA6.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx new file mode 100755 index 0000000000..5b2e2077d0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp.43793441438DD042.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx new file mode 100755 index 0000000000..46c89fe115 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp.2F31492B9345223B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx new file mode 100755 index 0000000000..ac50a242f6 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp.A7598EAA8CFFD492.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx new file mode 100755 index 0000000000..3f7a4605f3 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp.FC712CF5F768A94B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx new file mode 100755 index 0000000000..b12e081ff0 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp.6FC4AF50AF38A41A.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx new file mode 100755 index 0000000000..ba17cc6860 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp.B22CC0B7C42EE2B4.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx new file mode 100755 index 0000000000..a16ffc18d7 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp.E11E98EDBDF90D0B.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx new file mode 100755 index 0000000000..c85d894aca Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_cshuffle.hpp.CE155CFA093515AD.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx new file mode 100755 index 0000000000..62b2281895 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp.5121BAECA46B0C23.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx new file mode 100755 index 0000000000..d78992b5c1 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp.1597C09D10EC33F9.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx new file mode 100755 index 0000000000..264093ef7c Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp.FBCAD14B8CE81D3F.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx new file mode 100755 index 0000000000..2f7adb5724 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp.F03BAFFDB1120A94.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx new file mode 100755 index 0000000000..cd64544a59 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp.EADFE46F5C6C50DA.idx differ diff --git a/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx new file mode 100755 index 0000000000..75b128e083 Binary files /dev/null and b/.cache/clangd/index/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp.5656416FBF150D8D.idx differ diff --git a/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx b/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx new file mode 100755 index 0000000000..fea89b1c7c Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_bwd_data_f32_instance.cpp.6CBCB642104CF767.idx differ diff --git a/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx b/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx new file mode 100755 index 0000000000..0315564222 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_bwd_gamma_beta_f32_instance.cpp.59A23F12AA5E3659.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx b/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx new file mode 100755 index 0000000000..3b94e1bd94 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_f16_instance.cpp.E5229B139F1A9B82.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx b/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx new file mode 100755 index 0000000000..8c15a75171 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_f32_instance.cpp.D9D1D3F585AA6CD7.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx new file mode 100755 index 0000000000..c72d19680b Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp.4F6E8B2271D69F1C.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx new file mode 100755 index 0000000000..47f64cc623 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f16_instance.cpp.3151733ECD55CFAB.idx differ diff --git a/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx b/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx new file mode 100755 index 0000000000..2c235eb514 Binary files /dev/null and b/.cache/clangd/index/device_groupnorm_fwd_swish_f32_instance.cpp.7C71C8C8383AE2D3.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx b/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx new file mode 100755 index 0000000000..e841ec95cf Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gndhwc_3d_instance.cpp.B02146942667831D.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx b/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx new file mode 100755 index 0000000000..88a61b4431 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gnhwc_2d_instance.cpp.BAF41643A25FDB03.idx differ diff --git a/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx b/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx new file mode 100755 index 0000000000..7c4189bc59 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_gnwc_1d_instance.cpp.3EF12405C93313FD.idx differ diff --git a/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx b/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx new file mode 100755 index 0000000000..3f4eb285b7 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_impl.hpp.C46D2CA10F5E5642.idx differ diff --git a/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx b/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx new file mode 100755 index 0000000000..f0ad3d5929 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_instance.hpp.94D5335C0371893F.idx differ diff --git a/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx b/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx new file mode 100755 index 0000000000..dcce595072 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_ndhwgc_3d_instance.cpp.709987A54287EE7C.idx differ diff --git a/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx b/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx new file mode 100755 index 0000000000..57d39235b9 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_nhwgc_2d_instance.cpp.E16A024C295F734A.idx differ diff --git a/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx b/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx new file mode 100755 index 0000000000..adf441a213 Binary files /dev/null and b/.cache/clangd/index/device_image_to_column_nwgc_1d_instance.cpp.D56D4C0735B48980.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx b/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx new file mode 100755 index 0000000000..dd36674a66 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_data_f16_instance.cpp.7967A43F40615422.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx b/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx new file mode 100755 index 0000000000..b34215b662 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_data_f32_instance.cpp.BCCFAE80AA603EB7.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx new file mode 100755 index 0000000000..c0a57a4b0c Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp.756B8C951E2AA179.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx new file mode 100755 index 0000000000..0e6743a5b6 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp.4E9CDD5C09C49289.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx b/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx new file mode 100755 index 0000000000..f2b90c03ce Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_fwd_f16_instance.cpp.572C171AE3FAAC3B.idx differ diff --git a/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx b/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx new file mode 100755 index 0000000000..e959ff7210 Binary files /dev/null and b/.cache/clangd/index/device_layernorm2d_fwd_f32_instance.cpp.C34D1A2542CE24BA.idx differ diff --git a/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx b/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx new file mode 100755 index 0000000000..737cacba46 Binary files /dev/null and b/.cache/clangd/index/device_layernorm4d_fwd_f16_instance.cpp.4D368D0AFAADE23F.idx differ diff --git a/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx b/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx new file mode 100755 index 0000000000..be7f2f597b Binary files /dev/null and b/.cache/clangd/index/device_layernorm4d_fwd_f32_instance.cpp.82CA4512D672E7A2.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx new file mode 100755 index 0000000000..93072285a1 Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp.59F686652075C1F3.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx new file mode 100755 index 0000000000..ade6870ab9 Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f16_instance.cpp.075B2EC4AC466F2A.idx differ diff --git a/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx new file mode 100755 index 0000000000..4516486081 Binary files /dev/null and b/.cache/clangd/index/device_max_pool3d_fwd_ndhwc_f32_instance.cpp.2CC50FB6EE948F2B.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx b/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx new file mode 100755 index 0000000000..5e869eceeb Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd.hpp.67E96E50742F2EC4.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx b/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx new file mode 100755 index 0000000000..975b9b121b Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_bf16_instance.cpp.142DD51C4CE34BA2.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx b/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx new file mode 100755 index 0000000000..64d336ead6 Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_f16_instance.cpp.DCBDF5FDAE579133.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx b/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx new file mode 100755 index 0000000000..002db4dc9d Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_f32_instance.cpp.EA3D33FB1230F655.idx differ diff --git a/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx b/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx new file mode 100755 index 0000000000..7d0381ceed Binary files /dev/null and b/.cache/clangd/index/device_max_pool_bwd_impl.hpp.85737AA1C70FEFBE.idx differ diff --git a/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx b/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx new file mode 100755 index 0000000000..f7bbf88908 Binary files /dev/null and b/.cache/clangd/index/device_memory.cpp.A744015D851F9812.idx differ diff --git a/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx b/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx new file mode 100755 index 0000000000..3368d3eba4 Binary files /dev/null and b/.cache/clangd/index/device_memory.hpp.C2F2D98A33C55529.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx b/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx new file mode 100755 index 0000000000..334a2253ff Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce.hpp.188F02C49C64E773.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx b/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx new file mode 100755 index 0000000000..69f1d2e19c Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce_multiblock.hpp.BAAA7F55A7B0C61A.idx differ diff --git a/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx b/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx new file mode 100755 index 0000000000..6104a15901 Binary files /dev/null and b/.cache/clangd/index/device_multiple_reduce_threadwise.hpp.956BE9BDF24D2435.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx b/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx new file mode 100755 index 0000000000..1168b13c40 Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_data.hpp.1BAA9B467AEFC28F.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx b/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx new file mode 100755 index 0000000000..f4e3137488 Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_data_impl.hpp.A627060CAA69A19F.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx b/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx new file mode 100755 index 0000000000..9ae7a53839 Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_gamma_beta.hpp.78751175103676B4.idx differ diff --git a/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx b/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx new file mode 100755 index 0000000000..1e64a0d39b Binary files /dev/null and b/.cache/clangd/index/device_normalization_bwd_gamma_beta_impl.hpp.A5BD375EAE722F5E.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx b/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx new file mode 100755 index 0000000000..d1f37b0c58 Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd.hpp.DBDBAF89813C337F.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx b/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx new file mode 100755 index 0000000000..cba7f05b52 Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd_impl.hpp.2930CBBD3A2BF4E9.idx differ diff --git a/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx b/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx new file mode 100755 index 0000000000..5de48554c5 Binary files /dev/null and b/.cache/clangd/index/device_normalization_fwd_splitk_impl.hpp.6FD53387232480CC.idx differ diff --git a/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx b/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx new file mode 100755 index 0000000000..90d0ef9eaa Binary files /dev/null and b/.cache/clangd/index/device_normalize_instance.cpp.FD40702354DAF5AA.idx differ diff --git a/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx b/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx new file mode 100755 index 0000000000..88199850a1 Binary files /dev/null and b/.cache/clangd/index/device_operation_instance_factory.hpp.115B69A1EE76F903.idx differ diff --git a/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx b/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx new file mode 100755 index 0000000000..37c5147034 Binary files /dev/null and b/.cache/clangd/index/device_permute.hpp.47CCE84C97E4E9FC.idx differ diff --git a/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx b/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx new file mode 100755 index 0000000000..9e44096662 Binary files /dev/null and b/.cache/clangd/index/device_permute_impl.hpp.BEE7C34D1958CA2F.idx differ diff --git a/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx b/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx new file mode 100755 index 0000000000..f245650045 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_1d_instances.cpp.7EB32958B0512D73.idx differ diff --git a/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx b/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx new file mode 100755 index 0000000000..372d2d9f3a Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_2d_instances.cpp.2B80C1D2C455DED0.idx differ diff --git a/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx b/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx new file mode 100755 index 0000000000..4a8461f376 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_3d_instances.cpp.07A2A609833FE4D0.idx differ diff --git a/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx b/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx new file mode 100755 index 0000000000..bd80d15cc7 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_4d_instances.cpp.18341306CF0F5EFD.idx differ diff --git a/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx b/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx new file mode 100755 index 0000000000..fd8b9d1bd5 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_5d_instances.cpp.81367B36B3C16096.idx differ diff --git a/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx b/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx new file mode 100755 index 0000000000..965f8362fa Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_6d_instances.cpp.A19C7755BA5CCAF6.idx differ diff --git a/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx b/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx new file mode 100755 index 0000000000..c7a1340fb6 Binary files /dev/null and b/.cache/clangd/index/device_permute_scale_instances.hpp.6002ED9F3C6C6BA1.idx differ diff --git a/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx b/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx new file mode 100755 index 0000000000..416eadca25 Binary files /dev/null and b/.cache/clangd/index/device_pool2d_fwd_nhwc_nhwc.hpp.8F42F3A58241D669.idx differ diff --git a/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx b/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx new file mode 100755 index 0000000000..cdefc86378 Binary files /dev/null and b/.cache/clangd/index/device_pool3d_fwd_ndhwc_ndhwc.hpp.821043598893E6B4.idx differ diff --git a/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx b/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx new file mode 100755 index 0000000000..8f68847155 Binary files /dev/null and b/.cache/clangd/index/device_pool_fwd.hpp.048422E56031C936.idx differ diff --git a/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx b/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx new file mode 100755 index 0000000000..bdae4b0f91 Binary files /dev/null and b/.cache/clangd/index/device_prop.hpp.51F1A892B5410B39.idx differ diff --git a/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx b/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx new file mode 100755 index 0000000000..eeec123aeb Binary files /dev/null and b/.cache/clangd/index/device_put_element.hpp.50E2F2788BD7B2E7.idx differ diff --git a/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx b/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx new file mode 100755 index 0000000000..dabf15e66a Binary files /dev/null and b/.cache/clangd/index/device_put_element_impl.hpp.CF1E04C6FC237429.idx differ diff --git a/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx b/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx new file mode 100755 index 0000000000..9b56af3141 Binary files /dev/null and b/.cache/clangd/index/device_reduce.hpp.17BAD15A3D93B7FE.idx differ diff --git a/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx b/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx new file mode 100755 index 0000000000..fc44ece303 Binary files /dev/null and b/.cache/clangd/index/device_reduce_common.hpp.CAE83A095739383D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx b/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx new file mode 100755 index 0000000000..f7b8f7d676 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance.hpp.15BDAE27A6CA281A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx b/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx new file mode 100755 index 0000000000..1163855318 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise.hpp.C5FE82438250FD9A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx new file mode 100755 index 0000000000..766a32987a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.cpp.A1938EEEABE8B42A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx new file mode 100755 index 0000000000..fd2943c971 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_add.hpp.20070C9325736FAE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx new file mode 100755 index 0000000000..c11e67eac4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp.3CFF0C60C7399DEC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx new file mode 100755 index 0000000000..769ad2f0b5 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp.8C9F3AF79C3259FF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx new file mode 100755 index 0000000000..f6dee36658 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp.39D9A35BA78EDFCA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx new file mode 100755 index 0000000000..87dc5e1005 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp.D3804A89BDDBD68A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx new file mode 100755 index 0000000000..94149a0e5e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.cpp.26EBD8A22938C32C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx new file mode 100755 index 0000000000..136e14fecd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_max.hpp.EFFF67751118917D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx new file mode 100755 index 0000000000..8cb9a23cf8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.cpp.82123925B2E47DE5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx new file mode 100755 index 0000000000..d23c3412ef Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_min.hpp.1AFBE8401A0BB727.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx new file mode 100755 index 0000000000..2700fb7e29 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp.4AE84C1155A0BD16.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx new file mode 100755 index 0000000000..724964577d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp.E3EB62B3883D54D2.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx new file mode 100755 index 0000000000..ee74385df8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp.116108BDF1BABC19.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx new file mode 100755 index 0000000000..1f83b89cfb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp.9716CDC40BFB2514.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx new file mode 100755 index 0000000000..6b582a00cf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.cpp.F4D8C3EC38AC4A06.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx new file mode 100755 index 0000000000..dd84c05a48 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_max.hpp.E4FC0C2AEABCC117.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx new file mode 100755 index 0000000000..ee8bac9876 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.cpp.8D16D62A0EBE3E6E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx new file mode 100755 index 0000000000..03b13083da Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f16_f16_min.hpp.4404F6E1807E5BD4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx new file mode 100755 index 0000000000..a401a5d923 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.cpp.6312AF648C08A121.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx new file mode 100755 index 0000000000..4179663f06 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_add.hpp.70C411CE7496D684.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx new file mode 100755 index 0000000000..8a8042ffc3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp.BB89842592147883.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx new file mode 100755 index 0000000000..61651ed42a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp.08DE17CE7004DBDC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx new file mode 100755 index 0000000000..bc4534e662 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp.15C86B2B21F27130.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx new file mode 100755 index 0000000000..d004a4b8a4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp.152CF392CC9366CD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx new file mode 100755 index 0000000000..70fcd9199e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.cpp.07FADC8850641162.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx new file mode 100755 index 0000000000..eaacfdf458 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_add.hpp.B81990FE8C5D8509.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx new file mode 100755 index 0000000000..c6b48fa578 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp.562409E747DBF8A8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx new file mode 100755 index 0000000000..b9aa906233 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp.DF4FA5048878CC52.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx new file mode 100755 index 0000000000..abc24bb695 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp.F349F4E1672B2959.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx new file mode 100755 index 0000000000..a855882109 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp.1B257007DC8DF36B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx new file mode 100755 index 0000000000..503ed85279 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.cpp.58C6C0B25D770B51.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx new file mode 100755 index 0000000000..8774b23bac Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_max.hpp.9E23A16BC92C20DB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx new file mode 100755 index 0000000000..70a299bdc4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.cpp.0B95E75A805DE595.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx new file mode 100755 index 0000000000..51496e3be0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_min.hpp.B0727F2EA98E1F00.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx new file mode 100755 index 0000000000..635672651e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp.FA19AE81B0ABEEED.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx new file mode 100755 index 0000000000..e8c4241da3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp.7E4B9D2EA0AA55ED.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx new file mode 100755 index 0000000000..6d8f0986c3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.cpp.B429C5C7F2FA95D4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx new file mode 100755 index 0000000000..2bf92945da Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_add.hpp.5BB1B8BA3E60EF71.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx new file mode 100755 index 0000000000..3c55d86c0b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp.CFF7599DE0E79FBB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx new file mode 100755 index 0000000000..8d08b13305 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp.46A8C8C2BE2BA19E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx new file mode 100755 index 0000000000..ef168aa651 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp.384AFFA95058603D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx new file mode 100755 index 0000000000..cfc3cd50fd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp.54BEF21DD1A84A95.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx new file mode 100755 index 0000000000..8bd404ad4f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.cpp.213C81B0FE4954D8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx new file mode 100755 index 0000000000..5ce0b637cf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_add.hpp.496DCAD6F6453198.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx new file mode 100755 index 0000000000..5c08ed1331 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp.13D835E23741A152.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx new file mode 100755 index 0000000000..7f1a31c7df Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp.2472648101515DFA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx new file mode 100755 index 0000000000..8ee9cb2f22 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp.11BCD7B99E3C6DB4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx new file mode 100755 index 0000000000..ce4c28b936 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp.E5A6055D6ADB74AC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx new file mode 100755 index 0000000000..9cd1a30238 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.cpp.3646CB646CAA9902.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx new file mode 100755 index 0000000000..bd26efe70c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_max.hpp.58F762D7856C2DD8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx new file mode 100755 index 0000000000..f3ece7947d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.cpp.1EAEE023D4C34351.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx new file mode 100755 index 0000000000..13f5d3bc6b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_min.hpp.94217680DDE41BC8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx new file mode 100755 index 0000000000..868f24e5f3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp.5276760A8102F515.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx new file mode 100755 index 0000000000..0034e55c5e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp.52EC3DCBFC6F1222.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx new file mode 100755 index 0000000000..b46a273e0e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.cpp.BC429E8BCB688FC0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx new file mode 100755 index 0000000000..205092d6d9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_add.hpp.29384EA1BE892314.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx new file mode 100755 index 0000000000..aa5be541b4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp.C4B64A596D6DF817.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx new file mode 100755 index 0000000000..d0a3c592eb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp.615A6DAAC49A37A5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx new file mode 100755 index 0000000000..5d1ecb6961 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp.9FC33F8197771FF0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx new file mode 100755 index 0000000000..9e41d8b46c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp.CF7CEDDC624CFFAF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx new file mode 100755 index 0000000000..97119d91a6 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.cpp.20716445BE6CBC5D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx new file mode 100755 index 0000000000..e9f0ca96d0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_max.hpp.A0110584C215CD9C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx new file mode 100755 index 0000000000..d9101b8dae Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.cpp.D8BEF9441B9F4BC8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx new file mode 100755 index 0000000000..30a528ffa3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_blockwise_i8_i8_i8_min.hpp.8F13163D01C8AE8F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx b/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx new file mode 100755 index 0000000000..c3b24698cf Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_impl_common.hpp.F2201C19BAE9FFE8.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx new file mode 100755 index 0000000000..a457c7c5b8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add.hpp.F65D86E6C84EDA60.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx new file mode 100755 index 0000000000..24fe3dcfc0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp.5E6D995E61ADA8BC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx new file mode 100755 index 0000000000..e1e0b187bc Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp.510427928F98A02B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx new file mode 100755 index 0000000000..7695580fea Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp.54085188C8447423.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx new file mode 100755 index 0000000000..3a017998f9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp.0ADD6329411B78B1.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx new file mode 100755 index 0000000000..2659067865 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp.4393525F2F4E8DA5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx new file mode 100755 index 0000000000..3149ea0aa4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp.21E4CBB422BF8D82.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx new file mode 100755 index 0000000000..95fe520793 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp.4BD601E4D7DE370C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx new file mode 100755 index 0000000000..3fc5753205 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp.619ED16F95775BEA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx new file mode 100755 index 0000000000..973d3cf994 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp.873C6FA8487AF602.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx new file mode 100755 index 0000000000..ffba7fd45c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp.DA5C10F93175C08B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx new file mode 100755 index 0000000000..b92535801b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp.A62B76883CD2E814.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx new file mode 100755 index 0000000000..06f0becd22 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp.82487B9F10609357.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx new file mode 100755 index 0000000000..24d95cc28b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp.979C0C60ED1AF7B3.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx new file mode 100755 index 0000000000..5c5c8da3ab Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp.16AAA28D16A62BE9.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx new file mode 100755 index 0000000000..841c9cb237 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp.3DCB7674F6E295A2.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx new file mode 100755 index 0000000000..e7ed5b5975 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp.3C8AE5F8E636FD38.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx new file mode 100755 index 0000000000..4101a3702a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp.EC103046C3FCC222.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx new file mode 100755 index 0000000000..bcee92fb44 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp.EC5F0E8B3A5B73CB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx new file mode 100755 index 0000000000..b248cc730c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp.F69FD80D45178C79.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx new file mode 100755 index 0000000000..2f8bfb7638 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp.F6C8C49B5460F242.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx b/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx new file mode 100755 index 0000000000..00f79324f4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise.hpp.C398C657987C23EC.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx new file mode 100755 index 0000000000..0ae2411395 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.cpp.7EFDD1FE32B6F3E3.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx new file mode 100755 index 0000000000..ad4959b9df Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_add.hpp.2D7F2A668C85C670.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx new file mode 100755 index 0000000000..42b90a2dc4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp.DCF9F78C2573F20F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx new file mode 100755 index 0000000000..e2016d3792 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp.E64E06F9E993015F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx new file mode 100755 index 0000000000..5369d9d439 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp.71BA7AE5BF315EB4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx new file mode 100755 index 0000000000..0101b16761 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp.5814F92CA6CF0198.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx new file mode 100755 index 0000000000..32f80edd74 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.cpp.D9E364199F88743A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx new file mode 100755 index 0000000000..afb82de642 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_max.hpp.B8A4A04F978ABC79.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx new file mode 100755 index 0000000000..694c861797 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.cpp.B60965F1C97196EF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx new file mode 100755 index 0000000000..421d4c6def Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_min.hpp.97362179A977A149.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx new file mode 100755 index 0000000000..8d4f2c8f67 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp.C510348DB0E9EAF4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx new file mode 100755 index 0000000000..434021860b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp.B82C759E6266A9D7.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx new file mode 100755 index 0000000000..b3221458b7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp.6F6EC6F8F6066B26.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx new file mode 100755 index 0000000000..9b90a662be Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp.2B3A1DBDB134572E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx new file mode 100755 index 0000000000..5d714d2d37 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.cpp.501586DF17B9409B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx new file mode 100755 index 0000000000..0732c7d853 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_max.hpp.EA77360039638CEE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx new file mode 100755 index 0000000000..a2b08f506d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.cpp.F85FE38D0DCF8C44.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx new file mode 100755 index 0000000000..7a4c330698 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f16_f16_min.hpp.D976B4CDF936A58A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx new file mode 100755 index 0000000000..92ab9dd705 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.cpp.630A4073E2B46FEA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx new file mode 100755 index 0000000000..574fac2616 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_add.hpp.7BFEE24C60F74EC6.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx new file mode 100755 index 0000000000..7b9af3c032 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp.BB154BAE452FBECD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx new file mode 100755 index 0000000000..7e2046bb16 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp.442706BD47327B2E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx new file mode 100755 index 0000000000..261169ee21 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp.01982D48C410CB47.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx new file mode 100755 index 0000000000..c66bbd1d4b Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp.D90C9C6069808C78.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx new file mode 100755 index 0000000000..167020fde3 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.cpp.30899F9F46343B0E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx new file mode 100755 index 0000000000..cb085c1db7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_add.hpp.3C1359A00C377B18.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx new file mode 100755 index 0000000000..6aabacf373 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp.F787647EC7B1DE3C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx new file mode 100755 index 0000000000..f818c5f0ed Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp.2ABD3DA274EED7C0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx new file mode 100755 index 0000000000..662b3b08bd Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp.0D0748D60784A612.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx new file mode 100755 index 0000000000..5ac69dceda Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp.F51F72472293693C.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx new file mode 100755 index 0000000000..ac553b8d2d Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.cpp.35DDEC8F63798EAB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx new file mode 100755 index 0000000000..e78e942fb0 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_max.hpp.4E3093BB6320EF1D.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx new file mode 100755 index 0000000000..6e2a4c1515 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.cpp.1CE993853CF695D0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx new file mode 100755 index 0000000000..1263045711 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_min.hpp.448A5294C15B52AE.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx new file mode 100755 index 0000000000..b88cc62334 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp.F71A95723F67A7AA.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx new file mode 100755 index 0000000000..33a9defdc9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp.1D44113CF201458F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx new file mode 100755 index 0000000000..8621f29427 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.cpp.DBE748926DFEDB8B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx new file mode 100755 index 0000000000..a7dbc3480e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_add.hpp.55EEA55DB1346E7B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx new file mode 100755 index 0000000000..01f696025a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp.E1314404E465BA33.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx new file mode 100755 index 0000000000..fe519a6aad Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp.D9B461C00E68A892.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx new file mode 100755 index 0000000000..e5cff3fd00 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp.C0DCB6010E545A4E.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx new file mode 100755 index 0000000000..fdd7fad76f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp.082457B5EE500DB0.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx new file mode 100755 index 0000000000..794d9a1782 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.cpp.FF7D2E3E29E0477B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx new file mode 100755 index 0000000000..5f16201bea Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_add.hpp.774FB922ABF22CDD.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx new file mode 100755 index 0000000000..1b63a378e8 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp.095F5D6B26D2F29A.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx new file mode 100755 index 0000000000..479067d58c Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp.5FCEC88A65C41974.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx new file mode 100755 index 0000000000..a1e80fa3e1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp.BB5B12454481FF04.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx new file mode 100755 index 0000000000..ae3487e2bb Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp.7B5AB5E79B93DFFB.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx new file mode 100755 index 0000000000..6c63edcd6e Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.cpp.BAF3E66A0EDF3C75.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx new file mode 100755 index 0000000000..b5132a9be5 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_max.hpp.390E8CBCF4E86ABF.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx new file mode 100755 index 0000000000..6714bc7344 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.cpp.778D73C35496348B.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx new file mode 100755 index 0000000000..2307d37046 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_min.hpp.2C7338E3132300C6.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx new file mode 100755 index 0000000000..e0a9743cf2 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp.59008E97BEF36435.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx new file mode 100755 index 0000000000..d60fd8507f Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp.4329A8B0483B68A4.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx new file mode 100755 index 0000000000..e26d1ea370 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.cpp.509D27379633375F.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx new file mode 100755 index 0000000000..96731ede63 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_add.hpp.7F575A4F4970CC92.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx new file mode 100755 index 0000000000..de1ee93dc1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp.BBCEE5A5D01A00A5.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx new file mode 100755 index 0000000000..ba1a25667a Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp.FDD16FEA381DDE62.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx new file mode 100755 index 0000000000..e87b3c98f7 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp.C6B6423A4D3C4156.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx new file mode 100755 index 0000000000..408cd30630 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp.F8ED464DDA5BD732.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx new file mode 100755 index 0000000000..9042dbabf4 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.cpp.C1E70E7984041F16.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx new file mode 100755 index 0000000000..f90de1f4f1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_max.hpp.1F0CCA2641B90428.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx new file mode 100755 index 0000000000..d640fb62c9 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.cpp.05E7FDBEF4E2FEA1.idx differ diff --git a/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx new file mode 100755 index 0000000000..759a6f7d17 Binary files /dev/null and b/.cache/clangd/index/device_reduce_instance_threadwise_i8_i8_i8_min.hpp.078E8B6FD22E7DA4.idx differ diff --git a/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx b/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx new file mode 100755 index 0000000000..e7dd55f72e Binary files /dev/null and b/.cache/clangd/index/device_reduce_multiblock.hpp.C0B69E5EA2AC7CE7.idx differ diff --git a/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx b/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx new file mode 100755 index 0000000000..7ff39115d1 Binary files /dev/null and b/.cache/clangd/index/device_reduce_threadwise.hpp.71DBCBAA668DE14B.idx differ diff --git a/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx b/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx new file mode 100755 index 0000000000..2799d7a00b Binary files /dev/null and b/.cache/clangd/index/device_softmax.hpp.497B752134CE80EA.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx new file mode 100755 index 0000000000..2d715ac5c2 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.cpp.6DE6521393D18F5C.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx new file mode 100755 index 0000000000..db413c0392 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce1.hpp.B94CF584D85EABE4.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx new file mode 100755 index 0000000000..abc0470094 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.cpp.6002EE43735BD2AD.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx new file mode 100755 index 0000000000..999ca4064e Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce2.hpp.453CC2F7EB0502D0.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx new file mode 100755 index 0000000000..36f8af3325 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.cpp.62DA65F400167AF3.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx new file mode 100755 index 0000000000..9c8754b520 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank3_reduce3.hpp.3F8E078976E83385.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx new file mode 100755 index 0000000000..f588676bd6 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.cpp.22BB990A62BE50C5.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx new file mode 100755 index 0000000000..b01e47c715 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce1.hpp.7F7D38DA481813D7.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx new file mode 100755 index 0000000000..36fcb19c6e Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.cpp.AC6F1FB653149557.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx new file mode 100755 index 0000000000..2b0c203b8b Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce2.hpp.44F668BC92D2FB11.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx new file mode 100755 index 0000000000..3887f06247 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.cpp.2BDC7BD673407442.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx new file mode 100755 index 0000000000..a79cb2efcb Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce3.hpp.1A5543D9F44A4D1E.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx new file mode 100755 index 0000000000..8d68770e0a Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.cpp.E94DE85158A7059C.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx new file mode 100755 index 0000000000..96e72e1a47 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_rank4_reduce4.hpp.DFA0CA7449260515.idx differ diff --git a/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx b/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx new file mode 100755 index 0000000000..e2dfe73499 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f16_f16_instance_type.hpp.D6CB9472D3A0B40A.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx new file mode 100755 index 0000000000..13642983a8 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.cpp.7A9B6FF1C1DE5769.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx new file mode 100755 index 0000000000..599375e3d9 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce1.hpp.901763453949395E.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx new file mode 100755 index 0000000000..320b79c5e3 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.cpp.B1019C659E1C387F.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx new file mode 100755 index 0000000000..737214420a Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce2.hpp.E7E5CB6E00C1ACF7.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx new file mode 100755 index 0000000000..c5f011f2ee Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.cpp.D8971A21BC00D0FF.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx new file mode 100755 index 0000000000..9bd481892b Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank3_reduce3.hpp.5132DD2F1F4E6659.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx new file mode 100755 index 0000000000..0f68683264 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.cpp.70DAADE138E1FD1C.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx new file mode 100755 index 0000000000..0b2c120f27 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce1.hpp.D21E6E14C4987978.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx new file mode 100755 index 0000000000..7b38bfaee3 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.cpp.C8298BB69F4C257A.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx new file mode 100755 index 0000000000..39c9db3eab Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce2.hpp.E0AE02E34E90B0FC.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx new file mode 100755 index 0000000000..8e8ad0775e Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.cpp.25E3FD604E588BD0.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx new file mode 100755 index 0000000000..e295fdfaa3 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce3.hpp.33AFA8695A15063D.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx new file mode 100755 index 0000000000..7e9f9bfd7c Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.cpp.86DA1C4D6D7DC8E7.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx new file mode 100755 index 0000000000..7d193ff703 Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_rank4_reduce4.hpp.35C1B364BF55F9FD.idx differ diff --git a/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx b/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx new file mode 100755 index 0000000000..60f68634bb Binary files /dev/null and b/.cache/clangd/index/device_softmax_f32_f32_instance_type.hpp.7F3C1693216AB251.idx differ diff --git a/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx b/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx new file mode 100755 index 0000000000..1b723e0dd8 Binary files /dev/null and b/.cache/clangd/index/device_softmax_impl.hpp.B9E8182EF8FC3442.idx differ diff --git a/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx b/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx new file mode 100755 index 0000000000..b22cfc111c Binary files /dev/null and b/.cache/clangd/index/device_softmax_instance.hpp.E665F0AD026D2FE2.idx differ diff --git a/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx b/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx new file mode 100755 index 0000000000..90a8972502 Binary files /dev/null and b/.cache/clangd/index/device_sparse_embeddings_forward_layernorm.hpp.469278C9F8399B0C.idx differ diff --git a/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx b/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx new file mode 100755 index 0000000000..498ffaad97 Binary files /dev/null and b/.cache/clangd/index/device_splitk_contraction_multiple_d.hpp.B9F979311F947532.idx differ diff --git a/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx b/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx new file mode 100755 index 0000000000..db05bd26a7 Binary files /dev/null and b/.cache/clangd/index/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp.43569A70DF974D2E.idx differ diff --git a/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx b/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx new file mode 100755 index 0000000000..0d8ef17f06 Binary files /dev/null and b/.cache/clangd/index/device_transpose_instance.hpp.9CF832E322CA1530.idx differ diff --git a/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx b/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx new file mode 100755 index 0000000000..4483a90c04 Binary files /dev/null and b/.cache/clangd/index/device_transpose_instances_3d.cpp.5DC4547776B7C3A0.idx differ diff --git a/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx b/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx new file mode 100755 index 0000000000..c000f5d7f1 Binary files /dev/null and b/.cache/clangd/index/dpp_gemm.hpp.CD386ABA025C1896.idx differ diff --git a/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx b/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx new file mode 100755 index 0000000000..1fa3488651 Binary files /dev/null and b/.cache/clangd/index/dual_reduce_common.hpp.746AA107CF079C11.idx differ diff --git a/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx b/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx new file mode 100755 index 0000000000..40f1298f14 Binary files /dev/null and b/.cache/clangd/index/dual_reduce_multiblock.cpp.C5E150B4E9B1E7E8.idx differ diff --git a/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx b/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx new file mode 100755 index 0000000000..0e3349227d Binary files /dev/null and b/.cache/clangd/index/dual_reduce_threadwise.cpp.7BA56606324C3914.idx differ diff --git a/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx b/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx new file mode 100755 index 0000000000..248bc9f465 Binary files /dev/null and b/.cache/clangd/index/dynamic_buffer.hpp.9B076D16FB6FDAA3.idx differ diff --git a/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx b/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx new file mode 100755 index 0000000000..21141e5245 Binary files /dev/null and b/.cache/clangd/index/element_wise_operation.hpp.0715A40D7E0DC380.idx differ diff --git a/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx b/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx new file mode 100755 index 0000000000..08d7ee996a Binary files /dev/null and b/.cache/clangd/index/elementwise_add_1d.cpp.CD0512697217A13B.idx differ diff --git a/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx b/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx new file mode 100755 index 0000000000..59c76f0ad3 Binary files /dev/null and b/.cache/clangd/index/elementwise_add_4d.cpp.6D6ECABFC0DFD709.idx differ diff --git a/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx b/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx new file mode 100755 index 0000000000..7c44c49b1d Binary files /dev/null and b/.cache/clangd/index/elementwise_layernorm_blockwise.cpp.B1B3F8F277F6314A.idx differ diff --git a/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx b/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx new file mode 100755 index 0000000000..7f03b779ed Binary files /dev/null and b/.cache/clangd/index/elementwise_normalization.hpp.B86CF9747F7CBAA9.idx differ diff --git a/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx b/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx new file mode 100755 index 0000000000..bb27fbc779 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute.cpp.780590C9EF3E30CE.idx differ diff --git a/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx b/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx new file mode 100755 index 0000000000..6c32cf5ca3 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_3d.cpp.2AC2BFBCE3237215.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx b/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx new file mode 100755 index 0000000000..26277d5cfc Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16.cpp.267AD0382A416744.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx new file mode 100755 index 0000000000..9c5e4e7d9c Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_2d.cpp.73F9F8FFA71E623F.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx new file mode 100755 index 0000000000..e489d36c0d Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_col.cpp.427D8FEA072CC243.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx b/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx new file mode 100755 index 0000000000..8641b5b730 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp16_row.cpp.70A9A9EF321166D7.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx b/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx new file mode 100755 index 0000000000..9d05a298dd Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp32_col.cpp.645D32DD67217BF3.idx differ diff --git a/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx b/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx new file mode 100755 index 0000000000..73954ef1c9 Binary files /dev/null and b/.cache/clangd/index/elementwise_permute_4D_fp32_row.cpp.51034D29A216280F.idx differ diff --git a/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx b/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx new file mode 100755 index 0000000000..882fbc2bad Binary files /dev/null and b/.cache/clangd/index/enable_if.hpp.10016CB3379BC2AC.idx differ diff --git a/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx b/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx new file mode 100755 index 0000000000..3210bc0553 Binary files /dev/null and b/.cache/clangd/index/f8_utils.hpp.AB5D9F32110A912A.idx differ diff --git a/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx b/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx new file mode 100755 index 0000000000..140a1fc80d Binary files /dev/null and b/.cache/clangd/index/fill.hpp.A302158AF84698F8.idx differ diff --git a/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx b/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx new file mode 100755 index 0000000000..eec0504ad8 Binary files /dev/null and b/.cache/clangd/index/functional.hpp.AB02FF3D06BB8256.idx differ diff --git a/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx b/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx new file mode 100755 index 0000000000..530c9333b0 Binary files /dev/null and b/.cache/clangd/index/functional2.hpp.4855B705916A8A54.idx differ diff --git a/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx b/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx new file mode 100755 index 0000000000..cce5b119a5 Binary files /dev/null and b/.cache/clangd/index/functional3.hpp.730E80629C119B9D.idx differ diff --git a/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx b/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx new file mode 100755 index 0000000000..471e2ef40d Binary files /dev/null and b/.cache/clangd/index/functional4.hpp.579A0D5EFC68EB03.idx differ diff --git a/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx b/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx new file mode 100755 index 0000000000..d4949b16bf Binary files /dev/null and b/.cache/clangd/index/gemm.hpp.344713A6C512A32C.idx differ diff --git a/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx b/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx new file mode 100755 index 0000000000..95e0d066f1 Binary files /dev/null and b/.cache/clangd/index/gemm.hpp.C45C6ACD321C3642.idx differ diff --git a/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx b/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx new file mode 100755 index 0000000000..92a4be2c7c Binary files /dev/null and b/.cache/clangd/index/gemm_add.hpp.0A05FF4E11D910E0.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx b/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx new file mode 100755 index 0000000000..3cd773bc4a Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu.hpp.C1858AFB6E6189D2.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx new file mode 100755 index 0000000000..1c7a519110 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_bf16.cpp.D697A303BBCD00E1.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx new file mode 100755 index 0000000000..fa5e345e3e Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp16.cpp.BE72E2A90F751444.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx new file mode 100755 index 0000000000..c1d7f6846b Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_fp32.cpp.1D688E059DA5C6D3.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx new file mode 100755 index 0000000000..6ac8ee6e62 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_int8.cpp.7C822BDA45FD8B5C.idx differ diff --git a/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx new file mode 100755 index 0000000000..2193d2333a Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp.DFA0EFAE6F444F2C.idx differ diff --git a/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx b/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx new file mode 100755 index 0000000000..e11eb70ed7 Binary files /dev/null and b/.cache/clangd/index/gemm_add_add_mean_meansquare_xdl_fp16.cpp.784D28B0B387AEB3.idx differ diff --git a/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx b/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx new file mode 100755 index 0000000000..ce078c9e23 Binary files /dev/null and b/.cache/clangd/index/gemm_add_addsquare_xdl_int8.cpp.C81A82E4BF27D67D.idx differ diff --git a/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx b/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx new file mode 100755 index 0000000000..7b588fbed1 Binary files /dev/null and b/.cache/clangd/index/gemm_add_fastgelu.hpp.7F025B27BB774C23.idx differ diff --git a/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx b/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx new file mode 100755 index 0000000000..5f659ccf2b Binary files /dev/null and b/.cache/clangd/index/gemm_add_multiply.hpp.A076E88A74402B66.idx differ diff --git a/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx b/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx new file mode 100755 index 0000000000..7129663e58 Binary files /dev/null and b/.cache/clangd/index/gemm_add_multiply_xdl_fp16.cpp.2190407D709E315C.idx differ diff --git a/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx b/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx new file mode 100755 index 0000000000..2561e64e2b Binary files /dev/null and b/.cache/clangd/index/gemm_add_relu.hpp.2FD450825E8EDD32.idx differ diff --git a/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx b/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx new file mode 100755 index 0000000000..76a4369d51 Binary files /dev/null and b/.cache/clangd/index/gemm_add_relu_add_layernorm.hpp.2503A6C1813DEBB3.idx differ diff --git a/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx b/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx new file mode 100755 index 0000000000..9c6538cc9a Binary files /dev/null and b/.cache/clangd/index/gemm_add_silu.hpp.8120675903700DB9.idx differ diff --git a/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx b/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx new file mode 100755 index 0000000000..1de2a6c84c Binary files /dev/null and b/.cache/clangd/index/gemm_bf16.cpp.B8DB90A206756A9E.idx differ diff --git a/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx b/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx new file mode 100755 index 0000000000..38f07a5b48 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp.ACF8DA35700D8D8F.idx differ diff --git a/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx b/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx new file mode 100755 index 0000000000..becf34f3ae Binary files /dev/null and b/.cache/clangd/index/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp.83BB18F6B66F2289.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx new file mode 100755 index 0000000000..27e0077ede Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp.6BD99D80213AD3B0.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx new file mode 100755 index 0000000000..123bb48038 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp.AC32388E298FFA30.idx differ diff --git a/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx b/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx new file mode 100755 index 0000000000..276fd764f7 Binary files /dev/null and b/.cache/clangd/index/gemm_bias_relu_xdl_fp16.cpp.74D4B1F74A45E199.idx differ diff --git a/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx b/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx new file mode 100755 index 0000000000..ec5340700d Binary files /dev/null and b/.cache/clangd/index/gemm_bias_softmax_gemm_permute.cpp.A515B9EF146522D4.idx differ diff --git a/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx b/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx new file mode 100755 index 0000000000..a174214513 Binary files /dev/null and b/.cache/clangd/index/gemm_bilinear.hpp.092CAB87B366F08F.idx differ diff --git a/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx b/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx new file mode 100755 index 0000000000..4701b88e05 Binary files /dev/null and b/.cache/clangd/index/gemm_bilinear_xdl_fp16.cpp.372A779C3E21DD0E.idx differ diff --git a/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx b/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx new file mode 100755 index 0000000000..e21b070076 Binary files /dev/null and b/.cache/clangd/index/gemm_dpp_fp16.cpp.D49004015318A845.idx differ diff --git a/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx b/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx new file mode 100755 index 0000000000..832c1389ec Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nn_instance.cpp.684811D08F35F1E0.idx differ diff --git a/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx b/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx new file mode 100755 index 0000000000..c989cbd72b Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nn_instance.hpp.295DF91B6CFA2E94.idx differ diff --git a/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx b/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx new file mode 100755 index 0000000000..ccf9f4f0e3 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nt_instance.cpp.CF459381653536CF.idx differ diff --git a/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx b/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx new file mode 100755 index 0000000000..f07b2cbdc5 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_nt_instance.hpp.CF5C609546474B1F.idx differ diff --git a/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx b/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx new file mode 100755 index 0000000000..e5ccebc655 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tn_instance.cpp.A27FD0EC622C1968.idx differ diff --git a/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx b/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx new file mode 100755 index 0000000000..45c477f2b0 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tn_instance.hpp.C57CA23FF5A33CBF.idx differ diff --git a/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx b/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx new file mode 100755 index 0000000000..95bc6d2479 Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tt_instance.cpp.5D254C2DB3D84C02.idx differ diff --git a/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx b/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx new file mode 100755 index 0000000000..8eeedf5c0d Binary files /dev/null and b/.cache/clangd/index/gemm_f16_tt_instance.hpp.A9C0C844390863CE.idx differ diff --git a/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx b/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx new file mode 100755 index 0000000000..14a85bb252 Binary files /dev/null and b/.cache/clangd/index/gemm_fastgelu.hpp.BF1B6FA711DF924E.idx differ diff --git a/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx b/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx new file mode 100755 index 0000000000..5476891508 Binary files /dev/null and b/.cache/clangd/index/gemm_fp16.cpp.3FD2C2DD621D7AD4.idx differ diff --git a/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx b/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx new file mode 100755 index 0000000000..81a2995254 Binary files /dev/null and b/.cache/clangd/index/gemm_fp32.cpp.6736B177964E2D4B.idx differ diff --git a/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx b/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx new file mode 100755 index 0000000000..627c6607c4 Binary files /dev/null and b/.cache/clangd/index/gemm_int8.cpp.C4070EF85C2974E6.idx differ diff --git a/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx b/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx new file mode 100755 index 0000000000..6262963b68 Binary files /dev/null and b/.cache/clangd/index/gemm_layernorm_xdl_naive_fp16.cpp.B168DA32D72A32C6.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx b/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx new file mode 100755 index 0000000000..fc3b58db49 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_bf16.cpp.40A7D230DC3CAD48.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx b/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx new file mode 100755 index 0000000000..8f3e6b1f0e Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_fp16.cpp.F50CF88BE688DE26.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx b/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx new file mode 100755 index 0000000000..205d2749f0 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_fp32.cpp.1AFB0F6127AC986E.idx differ diff --git a/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx b/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx new file mode 100755 index 0000000000..188ac33dd6 Binary files /dev/null and b/.cache/clangd/index/gemm_max_xdl_int8.cpp.A29BA4DC035DE52E.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx new file mode 100755 index 0000000000..9bcaab9426 Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_bf16.cpp.E1D95735FEB3AF62.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx new file mode 100755 index 0000000000..6212ae8b7b Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp16.cpp.80F76169F90BD8DC.idx differ diff --git a/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx new file mode 100755 index 0000000000..d61fd4efb2 Binary files /dev/null and b/.cache/clangd/index/gemm_mean_meansquare_xdl_fp32.cpp.DAB08B69CF521C93.idx differ diff --git a/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx b/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx new file mode 100755 index 0000000000..51b25e7bca Binary files /dev/null and b/.cache/clangd/index/gemm_multi_ABD_xdl_fp16.cpp.5859AE2CEFBAACD5.idx differ diff --git a/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx b/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx new file mode 100755 index 0000000000..16c7745228 Binary files /dev/null and b/.cache/clangd/index/gemm_multiply_add.hpp.6B6D21EE52049C9D.idx differ diff --git a/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx b/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx new file mode 100755 index 0000000000..31cd900bb6 Binary files /dev/null and b/.cache/clangd/index/gemm_quantization_common.hpp.A23B3AA4CB2F3E7A.idx differ diff --git a/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx b/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx new file mode 100755 index 0000000000..fa81f84619 Binary files /dev/null and b/.cache/clangd/index/gemm_reduce_fp16.cpp.EFAED30A7BA2F6AF.idx differ diff --git a/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx b/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx new file mode 100755 index 0000000000..f5bb3522fd Binary files /dev/null and b/.cache/clangd/index/gemm_reduce_xdl_common.hpp.BF32975909DCA917.idx differ diff --git a/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx b/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx new file mode 100755 index 0000000000..4c4edce95d Binary files /dev/null and b/.cache/clangd/index/gemm_specialization.hpp.F3D9282B71057FD0.idx differ diff --git a/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx b/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx new file mode 100755 index 0000000000..ff78611036 Binary files /dev/null and b/.cache/clangd/index/gemm_splitk.hpp.749B3E67AC6B8B9C.idx differ diff --git a/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx b/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx new file mode 100755 index 0000000000..e712de5d3f Binary files /dev/null and b/.cache/clangd/index/gemm_standalone_xdl_fp16.cpp.EE84FA2A4697A71B.idx differ diff --git a/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx b/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx new file mode 100755 index 0000000000..5d2164748a Binary files /dev/null and b/.cache/clangd/index/gemm_streamk.hpp.B73183768811D4AE.idx differ diff --git a/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx b/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx new file mode 100755 index 0000000000..7910701d35 Binary files /dev/null and b/.cache/clangd/index/gemm_util.hpp.1BFB1C62397B2C8C.idx differ diff --git a/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx new file mode 100755 index 0000000000..0eba24c173 Binary files /dev/null and b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.cpp.DC996A5B769C800C.idx differ diff --git a/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx new file mode 100755 index 0000000000..996f827df9 Binary files /dev/null and b/.cache/clangd/index/gemm_wavelet_f16_tn_instance.hpp.6DF980223599227D.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx b/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx new file mode 100755 index 0000000000..cd629716ca Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bf16.cpp.3DCFB3ED2888403A.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx b/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx new file mode 100755 index 0000000000..276a4284f6 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bf16_rtn.cpp.52661E2CDB7295F6.idx differ diff --git a/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx b/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx new file mode 100755 index 0000000000..c44995d421 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_bias_relu_quantization_int8.cpp.FA91569EB26ACB71.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx b/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx new file mode 100755 index 0000000000..45bde02093 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16.cpp.FC8CAA78DACFED36.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx b/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx new file mode 100755 index 0000000000..f911a3a75d Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16_fp8.cpp.44C0E7F4DAEEA470.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx b/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx new file mode 100755 index 0000000000..7dbed18bdd Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp16_v2.cpp.BBCC41C35D9D878E.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx b/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx new file mode 100755 index 0000000000..41318b8d75 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp64.cpp.326BD4F598F31DB0.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx b/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx new file mode 100755 index 0000000000..005908ca90 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp8.cpp.67C1EEC83062DA6B.idx differ diff --git a/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx b/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx new file mode 100755 index 0000000000..c625ea67d2 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_fp8_bf8.cpp.AEAA96B6834BEA56.idx differ diff --git a/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx b/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx new file mode 100755 index 0000000000..98767742f7 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_int8.cpp.636EBC0B4897BF3A.idx differ diff --git a/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx b/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx new file mode 100755 index 0000000000..5ce5f001a6 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp.4306E603B691D828.idx differ diff --git a/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx new file mode 100755 index 0000000000..0f980b1e26 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp16.cpp.7BFA821D80385E72.idx differ diff --git a/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx new file mode 100755 index 0000000000..e5af5fec80 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_lds_direct_load_fp32.cpp.39A0D92907A2D21F.idx differ diff --git a/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx b/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx new file mode 100755 index 0000000000..d8bb10aaf4 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_quantization_int8.cpp.A58C21992E5540BA.idx differ diff --git a/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx b/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx new file mode 100755 index 0000000000..c135648fa4 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_skip_b_lds_fp16.cpp.86DD71A6DF5D193B.idx differ diff --git a/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx b/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx new file mode 100755 index 0000000000..130ac4aba5 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_streamk.cpp.2BC4B9D993728CB3.idx differ diff --git a/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx b/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx new file mode 100755 index 0000000000..a44a41fb78 Binary files /dev/null and b/.cache/clangd/index/gemm_xdl_wavelet_fp16.cpp.5100C57848816EFC.idx differ diff --git a/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx b/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx new file mode 100755 index 0000000000..ce5a6d47eb Binary files /dev/null and b/.cache/clangd/index/generic_memory_space_atomic.hpp.C9D4AE0A6DC5899F.idx differ diff --git a/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx b/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx new file mode 100755 index 0000000000..710a2a623d Binary files /dev/null and b/.cache/clangd/index/get_id.hpp.D8C96396D4A9E3B4.idx differ diff --git a/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx b/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx new file mode 100755 index 0000000000..8a962c7d1b Binary files /dev/null and b/.cache/clangd/index/get_shift.hpp.0A10E058B6B6AEDA.idx differ diff --git a/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx b/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx new file mode 100755 index 0000000000..f3fe57c665 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_multiple_reduction_multiblock.hpp.B962E3920F925D70.idx differ diff --git a/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx b/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx new file mode 100755 index 0000000000..efe8f75f84 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_multiple_reduction_threadwise.hpp.39A048A5A487AC02.idx differ diff --git a/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx b/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx new file mode 100755 index 0000000000..3f580fe9d6 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_reduction_multiblock.hpp.6BD3600B2B4E50BE.idx differ diff --git a/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx b/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx new file mode 100755 index 0000000000..1970566667 Binary files /dev/null and b/.cache/clangd/index/gridwise_2d_reduction_threadwise.hpp.324930ADA73F028A.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx b/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx new file mode 100755 index 0000000000..025c0c8040 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp.E109D2D6AEE43DDD.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx new file mode 100755 index 0000000000..88cb12e635 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp.FE93431287EC85FD.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx new file mode 100755 index 0000000000..e69cb3de04 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp.A946F68C0A5B7C29.idx differ diff --git a/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx b/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx new file mode 100755 index 0000000000..f1f22ea873 Binary files /dev/null and b/.cache/clangd/index/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp.EEF70E721362F483.idx differ diff --git a/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx b/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx new file mode 100755 index 0000000000..c5253527ea Binary files /dev/null and b/.cache/clangd/index/gridwise_batchnorm_backward_blockwise_welford.hpp.8F24BDC37125B404.idx differ diff --git a/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx b/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx new file mode 100755 index 0000000000..7241dba09c Binary files /dev/null and b/.cache/clangd/index/gridwise_batchnorm_forward_blockwise_welford.hpp.640DA951EF640017.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx b/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx new file mode 100755 index 0000000000..cd6bb67c71 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_1d.hpp.AC0144CD925F6905.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx b/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx new file mode 100755 index 0000000000..90272db2e0 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_1d_scale.hpp.CF7CE1061BA1DF52.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx b/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx new file mode 100755 index 0000000000..ea4a848e2d Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_2d.hpp.DCACD2865A1A82CC.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx b/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx new file mode 100755 index 0000000000..bc954cd8f5 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_3d.hpp.4B57404BC7344584.idx differ diff --git a/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx b/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx new file mode 100755 index 0000000000..9f9c5a8132 Binary files /dev/null and b/.cache/clangd/index/gridwise_elementwise_layernorm_welford_variance.hpp.61A857F748242277.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx b/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx new file mode 100755 index 0000000000..bed1f5dbed Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp.6AEBEC03A02FE520.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx b/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx new file mode 100755 index 0000000000..b70aab0568 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_dpp.hpp.E915886DCA1D5AA3.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx b/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx new file mode 100755 index 0000000000..dd3c3c4722 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp.8ED0C5830B90D078.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx new file mode 100755 index 0000000000..e349c8662e Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp.E96B152CEA3EEC31.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx new file mode 100755 index 0000000000..363b7b0660 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp.75601854417950D1.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx new file mode 100755 index 0000000000..4a42614665 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_wmma_cshuffle.hpp.7EFDFD3F43E2C561.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx new file mode 100755 index 0000000000..66230c649c Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle.hpp.7B2A45106BD10E35.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx new file mode 100755 index 0000000000..f7d353415a Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp.8A2E3D6921803C3C.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx new file mode 100755 index 0000000000..48c200a13e Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp.79ABECDBF9253649.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx b/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx new file mode 100755 index 0000000000..76837aa067 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_selector.hpp.4F4E408D810A020A.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx new file mode 100755 index 0000000000..a2b420caa1 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v1.hpp.8B594F4BBFC9415D.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx new file mode 100755 index 0000000000..ff45438c04 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v2.hpp.1C7FE339CB47DFD5.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx new file mode 100755 index 0000000000..43e9e4afa3 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v3.hpp.D2EF7B80D9AE44CD.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx b/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx new file mode 100755 index 0000000000..5e4070436c Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_pipeline_v4_direct_load.hpp.7C3BB633588D635D.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx b/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx new file mode 100755 index 0000000000..a9d1d6782a Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp.76D93CEF02B94AEF.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx b/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx new file mode 100755 index 0000000000..586b61c139 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp.F83027A680F1BF3E.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx b/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx new file mode 100755 index 0000000000..a7c0eeae3a Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_waveletmodel.hpp.C9F027882C9D4F2B.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx b/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx new file mode 100755 index 0000000000..3251656215 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_wmma.hpp.4F8B2CDD1348D704.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx new file mode 100755 index 0000000000..a8f867a42e Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v1.hpp.E796A6FD7BCB2B56.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx new file mode 100755 index 0000000000..beae9ba703 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_cshuffle_v2.hpp.E0B3C0FB7BAA0FB3.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx b/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx new file mode 100755 index 0000000000..c94fd94705 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp.FC9F72A612CD78AC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx b/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx new file mode 100755 index 0000000000..5ca594b41d Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp.647408B24FB9D296.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx b/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx new file mode 100755 index 0000000000..04d190babf Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_bwd_weight.hpp.65AB95ECB27BA1C2.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx b/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx new file mode 100755 index 0000000000..ce7e75651f Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_skip_b_lds_v1.hpp.8CF2D2B713EFE65F.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx b/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx new file mode 100755 index 0000000000..da1c489a14 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp.76232B181B348ACC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx b/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx new file mode 100755 index 0000000000..5c9ba8bb9a Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_streamk.hpp.3A8F1B585329CAFC.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx new file mode 100755 index 0000000000..000b5cded4 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v2r3.hpp.66B1A28D309FB5D4.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx new file mode 100755 index 0000000000..9c73ef86a0 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v2r4r2.hpp.FFFD55E6D49B5037.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx new file mode 100755 index 0000000000..d4c706130d Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r1.hpp.F4E339DE2B052B4F.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx new file mode 100755 index 0000000000..68e81bed95 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r2.hpp.A91ECFDCE099EED9.idx differ diff --git a/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx b/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx new file mode 100755 index 0000000000..acd23df480 Binary files /dev/null and b/.cache/clangd/index/gridwise_gemm_xdlops_v3r3.hpp.E0F0A6CCA772EDE7.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx b/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx new file mode 100755 index 0000000000..d8f7cde403 Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_batchnorm_forward.hpp.4F73C2A5286FBB35.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx b/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx new file mode 100755 index 0000000000..2dde4d948d Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp.C7E55A256926B340.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx b/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx new file mode 100755 index 0000000000..24f967f828 Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_first_half.hpp.25F962D583FA62A6.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx b/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx new file mode 100755 index 0000000000..7ffd94845e Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp.BEA9938289B2ECCF.idx differ diff --git a/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx b/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx new file mode 100755 index 0000000000..63250ba541 Binary files /dev/null and b/.cache/clangd/index/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp.03EA80728C2C5A38.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx b/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx new file mode 100755 index 0000000000..4e368f0358 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_bwd_data.hpp.43B5DE0C3D25BB20.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx b/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx new file mode 100755 index 0000000000..c199529565 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_bwd_gamma_beta.hpp.8028572B7299488D.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx b/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx new file mode 100755 index 0000000000..825ae9834b Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_naive_variance.hpp.9BD4CEF549374198.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx b/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx new file mode 100755 index 0000000000..ca921cfb64 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_selector.hpp.88C73ECDE0BC86D2.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx b/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx new file mode 100755 index 0000000000..7daeb203c7 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_splitk_1st.hpp.6CD10C30DDA5197D.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx b/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx new file mode 100755 index 0000000000..a16d1b9a62 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_splitk_2nd.hpp.FE37C91CA5D8EB00.idx differ diff --git a/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx b/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx new file mode 100755 index 0000000000..b290bf26d1 Binary files /dev/null and b/.cache/clangd/index/gridwise_normalization_welford_variance.hpp.FE77CD9DBE7C3CF0.idx differ diff --git a/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx b/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx new file mode 100755 index 0000000000..92605ba22f Binary files /dev/null and b/.cache/clangd/index/gridwise_permute.hpp.E425D88E25D439BC.idx differ diff --git a/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx b/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx new file mode 100755 index 0000000000..2d2d044e85 Binary files /dev/null and b/.cache/clangd/index/gridwise_put_element_1d.hpp.59E84F53787BFD31.idx differ diff --git a/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx b/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx new file mode 100755 index 0000000000..3d89a493cc Binary files /dev/null and b/.cache/clangd/index/gridwise_set_buffer_value.hpp.1407258F85A006CD.idx differ diff --git a/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx b/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx new file mode 100755 index 0000000000..294da0981f Binary files /dev/null and b/.cache/clangd/index/gridwise_set_multiple_buffer_value.hpp.77B0D514858D3743.idx differ diff --git a/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx b/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx new file mode 100755 index 0000000000..35ae5d9bb8 Binary files /dev/null and b/.cache/clangd/index/gridwise_softmax.hpp.5A728FA36C708E60.idx differ diff --git a/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx b/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx new file mode 100755 index 0000000000..88cb9c2df8 Binary files /dev/null and b/.cache/clangd/index/gridwise_sparse_embeddings_forward_layernorm.hpp.9F9F7C52CFA18397.idx differ diff --git a/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx b/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx new file mode 100755 index 0000000000..1b6a64470b Binary files /dev/null and b/.cache/clangd/index/gridwise_tensor_rearrange.hpp.73B250CAB9F4E6DB.idx differ diff --git a/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx b/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx new file mode 100755 index 0000000000..61f1acebc0 Binary files /dev/null and b/.cache/clangd/index/gridwise_welford_second_half_layernorm2d.hpp.52F6FAA1A972E598.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx b/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx new file mode 100755 index 0000000000..01510a6fe4 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp.2CC919AB5B044CF3.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx b/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx new file mode 100755 index 0000000000..74b14dceb6 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_data_xdl_fp16.cpp.618FF0F87C438165.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx new file mode 100755 index 0000000000..b842af19e7 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_bf16.cpp.3B512EE28C055424.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx new file mode 100755 index 0000000000..4f89411caf Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16.cpp.35241A5239C4258D.idx differ diff --git a/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx new file mode 100755 index 0000000000..27771b24a7 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp.BD38E20DB8B3998F.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx new file mode 100755 index 0000000000..0784ff7125 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_bf16.cpp.2A5915DD526E33D4.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx new file mode 100755 index 0000000000..d48c20886a Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp16.cpp.F0393DD4FF0F531D.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx new file mode 100755 index 0000000000..132e34fee5 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_fp32.cpp.329BE83F917399A6.idx differ diff --git a/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx new file mode 100755 index 0000000000..ff22637418 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_conv_fwd_xdl_int8.cpp.E847303767D0E187.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx new file mode 100755 index 0000000000..4e188e8fab Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp.E2C08E4AF953E7F0.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx new file mode 100755 index 0000000000..a502ab69f2 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp.9CA386C0ABEF35F6.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx new file mode 100755 index 0000000000..b2fcfbf15b Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp.77C435750269A2BB.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx new file mode 100755 index 0000000000..6bc6bead47 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp.8BC00DE6D83E6AA0.idx differ diff --git a/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx b/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx new file mode 100755 index 0000000000..eb93cb3527 Binary files /dev/null and b/.cache/clangd/index/grouped_conv_fwd_xdl_fp16.cpp.F004628471E9028F.idx differ diff --git a/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx b/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx new file mode 100755 index 0000000000..912b03ae24 Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_backward_data.hpp.04038859D7920D2D.idx differ diff --git a/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx b/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx new file mode 100755 index 0000000000..ca22b0b5b0 Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_backward_weight.hpp.CF4258D0B5FB5557.idx differ diff --git a/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx b/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx new file mode 100755 index 0000000000..5126d61e64 Binary files /dev/null and b/.cache/clangd/index/grouped_convolution_forward.hpp.3DF8CF239601D4CC.idx differ diff --git a/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx b/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx new file mode 100755 index 0000000000..fb85a305bc Binary files /dev/null and b/.cache/clangd/index/grouped_gemm.hpp.76EFEC97F28430A5.idx differ diff --git a/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx b/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx new file mode 100755 index 0000000000..94f4af8d88 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_bias_e_permute_xdl_fp16.cpp.D04ED4238574C098.idx differ diff --git a/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx b/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx new file mode 100755 index 0000000000..9b2bfe4404 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_fastgelu.hpp.81337D2196A77665.idx differ diff --git a/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx b/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx new file mode 100755 index 0000000000..706f325710 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_fixed_nk.hpp.92D3931E2C242798.idx differ diff --git a/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx b/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx new file mode 100755 index 0000000000..6b084ae928 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp.E01D0ADA6F0061BF.idx differ diff --git a/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx b/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx new file mode 100755 index 0000000000..21374b5298 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp.9FDD703AE6D2D15E.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx b/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx new file mode 100755 index 0000000000..e057314092 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_bf16.cpp.60909DDCF9BB055C.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx new file mode 100755 index 0000000000..23a6a75d02 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp.3B6907AB640AF24B.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx new file mode 100755 index 0000000000..c29908045b Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp16.cpp.C49639C51E0DF485.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx new file mode 100755 index 0000000000..7ba6a00191 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fixed_nk_fp8.cpp.33128DEF03F0DD92.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx b/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx new file mode 100755 index 0000000000..795190ee06 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fp16.cpp.164525B4D87B1687.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx b/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx new file mode 100755 index 0000000000..18ce02b5d5 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_fp32.cpp.F97F080B232E6F17.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx b/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx new file mode 100755 index 0000000000..86b63ef023 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_int8.cpp.01E8A20D5ECCA353.idx differ diff --git a/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx b/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx new file mode 100755 index 0000000000..24606faa17 Binary files /dev/null and b/.cache/clangd/index/grouped_gemm_xdl_splitk_fp16.cpp.5CCCCCFEB9008060.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx b/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx new file mode 100755 index 0000000000..e329fd2d49 Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_data.hpp.3714A9EE70464716.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx b/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx new file mode 100755 index 0000000000..1308e7be60 Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_fp32.cpp.94FC313C790AEF66.idx differ diff --git a/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx b/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx new file mode 100755 index 0000000000..bb8653dea9 Binary files /dev/null and b/.cache/clangd/index/groupnorm_bwd_gamma_beta.hpp.A4BF65E232956D79.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx b/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx new file mode 100755 index 0000000000..d6f38e1274 Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_sigmoid_mul_fp16.cpp.E1BDC41DAA5F65CC.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx b/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx new file mode 100755 index 0000000000..28672cfe3b Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_splitk_fp16.cpp.5CDEA132226AE994.idx differ diff --git a/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx b/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx new file mode 100755 index 0000000000..0fe62992ff Binary files /dev/null and b/.cache/clangd/index/groupnorm_fwd_swish_fp16.cpp.FDEC9D120BB249C9.idx differ diff --git a/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx b/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx new file mode 100755 index 0000000000..9a8aea2450 Binary files /dev/null and b/.cache/clangd/index/hip_check_error.hpp.B6401497C46CBC5E.idx differ diff --git a/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx b/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx new file mode 100755 index 0000000000..2e65e35f38 Binary files /dev/null and b/.cache/clangd/index/host_common_util.hpp.DD6CA1FC5D70D95B.idx differ diff --git a/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx b/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx new file mode 100755 index 0000000000..a83a8bdeee Binary files /dev/null and b/.cache/clangd/index/host_tensor.cpp.BD60013DE8F91330.idx differ diff --git a/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx b/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx new file mode 100755 index 0000000000..62d3457043 Binary files /dev/null and b/.cache/clangd/index/host_tensor.hpp.A2A2489B4F3A4E06.idx differ diff --git a/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx b/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx new file mode 100755 index 0000000000..b15a31f9af Binary files /dev/null and b/.cache/clangd/index/host_tensor_generator.hpp.44F3F167E8AE0CAB.idx differ diff --git a/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx b/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx new file mode 100755 index 0000000000..15ce55e538 Binary files /dev/null and b/.cache/clangd/index/ignore.hpp.67D37926F4126F70.idx differ diff --git a/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx b/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx new file mode 100755 index 0000000000..e0e3017023 Binary files /dev/null and b/.cache/clangd/index/image_to_column_f32.cpp.E393FC8829D80520.idx differ diff --git a/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx b/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx new file mode 100755 index 0000000000..2dc6ce05b1 Binary files /dev/null and b/.cache/clangd/index/inner_product.hpp.D15AD8CC8FA359B4.idx differ diff --git a/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx b/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx new file mode 100755 index 0000000000..109529eada Binary files /dev/null and b/.cache/clangd/index/inner_product_dpp8.hpp.5615826ED9AC4030.idx differ diff --git a/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx b/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx new file mode 100755 index 0000000000..a78929738a Binary files /dev/null and b/.cache/clangd/index/integral_constant.hpp.A156C83DCD53E7CE.idx differ diff --git a/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx b/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx new file mode 100755 index 0000000000..110000ff43 Binary files /dev/null and b/.cache/clangd/index/io.hpp.27CD569BA11007AA.idx differ diff --git a/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx b/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx new file mode 100755 index 0000000000..32c81bd745 Binary files /dev/null and b/.cache/clangd/index/is_detected.hpp.993129C9FD0FB446.idx differ diff --git a/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx b/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx new file mode 100755 index 0000000000..2b939712fe Binary files /dev/null and b/.cache/clangd/index/is_known_at_compile_time.hpp.C66FF603A6D2FF7E.idx differ diff --git a/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx b/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx new file mode 100755 index 0000000000..a1c2cef697 Binary files /dev/null and b/.cache/clangd/index/iterator.hpp.DDB0074CE39B229F.idx differ diff --git a/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx b/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx new file mode 100755 index 0000000000..1892ce568b Binary files /dev/null and b/.cache/clangd/index/kernel_launch.hpp.4FE4793F82580CE2.idx differ diff --git a/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx b/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx new file mode 100755 index 0000000000..901aa0396e Binary files /dev/null and b/.cache/clangd/index/kernel_utils.hpp.A2D72F83F06BC2A4.idx differ diff --git a/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx b/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx new file mode 100755 index 0000000000..fabaa1ec80 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_add_instance.cpp.5CD53C62DE24ED15.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx new file mode 100755 index 0000000000..64874d79d7 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v1_instance.cpp.2F582983A57414AF.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx new file mode 100755 index 0000000000..6afb5fd2b2 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_instance.cpp.FDC41D8D362237D7.idx differ diff --git a/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx new file mode 100755 index 0000000000..a4604b8d50 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_default_pipeline_v2_opt_instance.cpp.9D0F889F526341B0.idx differ diff --git a/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx b/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx new file mode 100755 index 0000000000..da0a6e8514 Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_interwave_pipeline_v1_instance.cpp.536F114F5BA9349C.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx new file mode 100755 index 0000000000..001ed6cbef Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v1_instance.cpp.5937911B0257B674.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx new file mode 100755 index 0000000000..54f5f1af2c Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_default_pipeline_v2_instance.cpp.0318E39A03EE883F.idx differ diff --git a/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx b/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx new file mode 100755 index 0000000000..cf87f69f3a Binary files /dev/null and b/.cache/clangd/index/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.0A7ACF40DAB7236A.idx differ diff --git a/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx b/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx new file mode 100755 index 0000000000..ea2909a4d6 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_add_instance.cpp.09C78F4C6D71CC59.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx new file mode 100755 index 0000000000..a8e6d09979 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v1_instance.cpp.7011152E4CED8319.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx new file mode 100755 index 0000000000..2bc4f6e6d8 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_instance.cpp.4ADB4E587F42B7E9.idx differ diff --git a/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx new file mode 100755 index 0000000000..266bfcb20b Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_default_pipeline_v2_opt_instance.cpp.AA43A9C5EED3E40A.idx differ diff --git a/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx b/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx new file mode 100755 index 0000000000..883704a192 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_interwave_pipeline_v1_instance.cpp.37A32BC5170A6461.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx new file mode 100755 index 0000000000..6e03792a3c Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v1_instance.cpp.322AFE9D596702FC.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx new file mode 100755 index 0000000000..9b8b3cf3c4 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_default_pipeline_v2_instance.cpp.6202EDA536A25A77.idx differ diff --git a/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx b/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx new file mode 100755 index 0000000000..401eea1ca2 Binary files /dev/null and b/.cache/clangd/index/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.667185EB457E88CA.idx differ diff --git a/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx b/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx new file mode 100755 index 0000000000..fce3b22bd6 Binary files /dev/null and b/.cache/clangd/index/layernorm2d_bwd_fp32.cpp.7066935C4C40FCB0.idx differ diff --git a/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx b/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx new file mode 100755 index 0000000000..91828e7461 Binary files /dev/null and b/.cache/clangd/index/layernorm2d_fwd_fp16.cpp.6079EB647B2C4778.idx differ diff --git a/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx b/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx new file mode 100755 index 0000000000..5c34b78667 Binary files /dev/null and b/.cache/clangd/index/layernorm2d_fwd_splitk_fp16.cpp.7A8CCC9ADABCC2A0.idx differ diff --git a/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx b/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx new file mode 100755 index 0000000000..a9c9e329b7 Binary files /dev/null and b/.cache/clangd/index/layernorm4d_fwd_fp16.cpp.FF678597E26B24C3.idx differ diff --git a/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx b/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx new file mode 100755 index 0000000000..15bd7b2915 Binary files /dev/null and b/.cache/clangd/index/layernorm4d_fwd_splitk_fp16.cpp.BF340D61393FB17F.idx differ diff --git a/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx b/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx new file mode 100755 index 0000000000..570dd29f0b Binary files /dev/null and b/.cache/clangd/index/layernorm_bwd_data.hpp.D81DD3681C23DD68.idx differ diff --git a/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx b/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx new file mode 100755 index 0000000000..a8dc6369b1 Binary files /dev/null and b/.cache/clangd/index/layernorm_bwd_gamma_beta.hpp.8CC35A2FBD2934F9.idx differ diff --git a/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx b/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx new file mode 100755 index 0000000000..aaa21cca05 Binary files /dev/null and b/.cache/clangd/index/layout.hpp.08D624D3F8E5BFE6.idx differ diff --git a/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx b/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx new file mode 100755 index 0000000000..54325797bb Binary files /dev/null and b/.cache/clangd/index/layout_utils.hpp.9A8926719F7B5577.idx differ diff --git a/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx b/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx new file mode 100755 index 0000000000..1446589e3a Binary files /dev/null and b/.cache/clangd/index/literals.hpp.AC03AD34F59B947A.idx differ diff --git a/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx b/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx new file mode 100755 index 0000000000..d08b4b335e Binary files /dev/null and b/.cache/clangd/index/loop_scheduler.hpp.CEBBF3BFF5C82A80.idx differ diff --git a/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx b/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx new file mode 100755 index 0000000000..db07d13b8d Binary files /dev/null and b/.cache/clangd/index/magic_division.hpp.7DE3E22991AC58AE.idx differ diff --git a/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx b/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx new file mode 100755 index 0000000000..47987dc360 Binary files /dev/null and b/.cache/clangd/index/magic_number_division.cpp.853FDB10989ECFC7.idx differ diff --git a/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx b/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx new file mode 100755 index 0000000000..7a7dd984d1 Binary files /dev/null and b/.cache/clangd/index/masking_specialization.hpp.2AE80C7347AB00C4.idx differ diff --git a/.cache/clangd/index/math.hpp.234B356D49034450.idx b/.cache/clangd/index/math.hpp.234B356D49034450.idx new file mode 100755 index 0000000000..65b9a6a974 Binary files /dev/null and b/.cache/clangd/index/math.hpp.234B356D49034450.idx differ diff --git a/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx b/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx new file mode 100755 index 0000000000..badf43a0d8 Binary files /dev/null and b/.cache/clangd/index/math_v2.hpp.562B4AF9F5A86873.idx differ diff --git a/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx b/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx new file mode 100755 index 0000000000..0c57d49403 Binary files /dev/null and b/.cache/clangd/index/matrix_padder.hpp.0539595BAF18EE86.idx differ diff --git a/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx b/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx new file mode 100755 index 0000000000..00b1494d99 Binary files /dev/null and b/.cache/clangd/index/max_pool_bwd.hpp.4F15616E2BF6E596.idx differ diff --git a/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx b/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx new file mode 100755 index 0000000000..08da602c1a Binary files /dev/null and b/.cache/clangd/index/max_pool_bwd_instance_common.hpp.F8135D216BA19663.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx b/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx new file mode 100755 index 0000000000..52057a21a2 Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_bf16.cpp.E133A720E87AC796.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx b/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx new file mode 100755 index 0000000000..6b0173b789 Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_common.hpp.4213849B15309F3C.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx b/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx new file mode 100755 index 0000000000..5a476c72ff Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_fp16.cpp.AA5B517C73C2030C.idx differ diff --git a/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx b/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx new file mode 100755 index 0000000000..b4ac47326d Binary files /dev/null and b/.cache/clangd/index/maxpool2d_bwd_fp32.cpp.3CE2CB1F186065DE.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx b/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx new file mode 100755 index 0000000000..3e05c05848 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_add_instance.cpp.F2B6A1DC94E17C44.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx new file mode 100755 index 0000000000..d328b71dd4 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v1_instance.cpp.B364344C594966A7.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx new file mode 100755 index 0000000000..7a721ab32f Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_instance.cpp.A590A56B0084D88A.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx new file mode 100755 index 0000000000..73c8cc8d98 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_default_pipeline_v2_opt_instance.cpp.7FA7EE3992571786.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx b/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx new file mode 100755 index 0000000000..5e799a3bf3 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_interwave_pipeline_v1_instance.cpp.E84C4EA0690C6426.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx new file mode 100755 index 0000000000..a59a645844 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp.1D02066DCD48924F.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx new file mode 100755 index 0000000000..c97f6357a2 Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp.DFAE93983C514BC6.idx differ diff --git a/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx b/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx new file mode 100755 index 0000000000..74a955f0bd Binary files /dev/null and b/.cache/clangd/index/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp.905D27CAAE2E6FF3.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx b/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx new file mode 100755 index 0000000000..0fe49d8d3b Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_add_instance.cpp.0D22D3DFBDE4264A.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx new file mode 100755 index 0000000000..094a38e903 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v1_instance.cpp.FC2B955D88E3500C.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx new file mode 100755 index 0000000000..c08f7ebfd7 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_instance.cpp.AB0F8B7DC8CEF96B.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx new file mode 100755 index 0000000000..a1b8712efc Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_default_pipeline_v2_opt_instance.cpp.38A404E476FFFA7A.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx b/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx new file mode 100755 index 0000000000..b9ffd37427 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_interwave_pipeline_v1_instance.cpp.1B80E7B3E985B173.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx new file mode 100755 index 0000000000..2b5a67557d Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp.8AF96AFDCF49C20F.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx new file mode 100755 index 0000000000..b11e71dac6 Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp.AD25F4CBEC60DDF2.idx differ diff --git a/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx b/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx new file mode 100755 index 0000000000..b2d7fddb1b Binary files /dev/null and b/.cache/clangd/index/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp.EB9B1319190E914D.idx differ diff --git a/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx b/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx new file mode 100755 index 0000000000..599eeeb37f Binary files /dev/null and b/.cache/clangd/index/multi_index.hpp.46D763C8845699EA.idx differ diff --git a/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx b/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx new file mode 100755 index 0000000000..f23b7750e2 Binary files /dev/null and b/.cache/clangd/index/multi_index_transform.hpp.97E44055DBD0AE25.idx differ diff --git a/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx b/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx new file mode 100755 index 0000000000..e346249d06 Binary files /dev/null and b/.cache/clangd/index/multi_index_transform_helper.hpp.7CC4F9B71E8A7CAA.idx differ diff --git a/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx b/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx new file mode 100755 index 0000000000..b23ae11376 Binary files /dev/null and b/.cache/clangd/index/normalization_bwd_data_instance_common.hpp.3AFBFA5B5298FD17.idx differ diff --git a/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx b/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx new file mode 100755 index 0000000000..e3664d2a8a Binary files /dev/null and b/.cache/clangd/index/normalization_bwd_gamma_beta_instance_common.hpp.84C16E4EE625B892.idx differ diff --git a/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx b/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx new file mode 100755 index 0000000000..e6dac2397a Binary files /dev/null and b/.cache/clangd/index/normalization_fwd.hpp.C0D3E8108CC38C5D.idx differ diff --git a/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx b/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx new file mode 100755 index 0000000000..821cc2330e Binary files /dev/null and b/.cache/clangd/index/normalization_fwd_instance_common.hpp.38A39908E71BFB61.idx differ diff --git a/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx b/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx new file mode 100755 index 0000000000..fa72a7b0af Binary files /dev/null and b/.cache/clangd/index/number.hpp.25A61C6490A403AE.idx differ diff --git a/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx b/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx new file mode 100755 index 0000000000..91e8596204 Binary files /dev/null and b/.cache/clangd/index/numeric.hpp.E1739325ACADC629.idx differ diff --git a/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx b/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx new file mode 100755 index 0000000000..ad3b77bf39 Binary files /dev/null and b/.cache/clangd/index/permute_1xHxW_fp16.cpp.5780304278E72678.idx differ diff --git a/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx b/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx new file mode 100755 index 0000000000..989ea697de Binary files /dev/null and b/.cache/clangd/index/permute_HxWx4_fp16.cpp.FCA2351A6D6E3584.idx differ diff --git a/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx b/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx new file mode 100755 index 0000000000..00d66a9b69 Binary files /dev/null and b/.cache/clangd/index/permute_NxHxW_fp16.cpp.6FE6AEAF71058087.idx differ diff --git a/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx b/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx new file mode 100755 index 0000000000..8d2b410014 Binary files /dev/null and b/.cache/clangd/index/permute_scale.hpp.CFE8A4ABEDAA7993.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx b/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx new file mode 100755 index 0000000000..338bc7330a Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_common.hpp.3724B9CBDE79A373.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx b/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx new file mode 100755 index 0000000000..9d0aff7c6a Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_fp16.cpp.09424A28FF97D19C.idx differ diff --git a/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx b/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx new file mode 100755 index 0000000000..21940e2cbb Binary files /dev/null and b/.cache/clangd/index/pool2d_fwd_fp32.cpp.1AF35A327BA2145F.idx differ diff --git a/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx b/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx new file mode 100755 index 0000000000..e753637eda Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd.hpp.32DAF50D80F54A30.idx differ diff --git a/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx b/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx new file mode 100755 index 0000000000..634cf91401 Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd_common.hpp.EE0EEEB8E82E75A0.idx differ diff --git a/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx b/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx new file mode 100755 index 0000000000..605fb0dd7f Binary files /dev/null and b/.cache/clangd/index/pool3d_fwd_fp16.cpp.176B982A3BF061DE.idx differ diff --git a/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx b/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx new file mode 100755 index 0000000000..d9800f6acd Binary files /dev/null and b/.cache/clangd/index/pool_fwd_instance_common.hpp.4DC931E3BBF2D1BA.idx differ diff --git a/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx b/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx new file mode 100755 index 0000000000..694bf27853 Binary files /dev/null and b/.cache/clangd/index/profile_avg_pool3d_bwd.cpp.4403F8C1766CA1E8.idx differ diff --git a/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx b/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx new file mode 100755 index 0000000000..d47a808cd0 Binary files /dev/null and b/.cache/clangd/index/profile_avg_pool3d_bwd_impl.hpp.C8240F2385534214.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx b/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx new file mode 100755 index 0000000000..eb1b44c88a Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm.cpp.8A238D0EF3B6DB52.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx new file mode 100755 index 0000000000..c6b382c5df Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add.cpp.CEB2ADB0FF2D0A07.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx new file mode 100755 index 0000000000..67b088cdcf Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_add_relu_gemm_add_impl.hpp.CC900C93F3C48179.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx b/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx new file mode 100755 index 0000000000..f862c54a76 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp.F57E6277DAC86E29.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx b/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx new file mode 100755 index 0000000000..4fdbd0130a Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_gemm.cpp.B214B5448CC6B6C2.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx b/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx new file mode 100755 index 0000000000..c96faf96d1 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_gemm_impl.hpp.95E513E815A63274.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx b/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx new file mode 100755 index 0000000000..c68960bb70 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_impl.hpp.65F140203E93B240.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx b/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx new file mode 100755 index 0000000000..0e0d9391a3 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_reduce.cpp.EEA3A1DAF90E2787.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx b/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx new file mode 100755 index 0000000000..e2b1fc5ba3 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_reduce_impl.hpp.5DA2586197AB1178.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx new file mode 100755 index 0000000000..103a6d4bc5 Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_impl.hpp.03AF7B0A8D358951.idx differ diff --git a/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx new file mode 100755 index 0000000000..ec8d4039eb Binary files /dev/null and b/.cache/clangd/index/profile_batched_gemm_softmax_gemm_permute_impl.hpp.14377DE4299D9A14.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx b/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx new file mode 100755 index 0000000000..6bb0850861 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_backward_impl.hpp.30164435038F0711.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx b/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx new file mode 100755 index 0000000000..c8d56c3253 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_bwd.cpp.8CF98960A7C44AE9.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx b/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx new file mode 100755 index 0000000000..bc7f958dd0 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_forward_impl.hpp.07F6CF6EEC2BDC7E.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx b/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx new file mode 100755 index 0000000000..f79468237d Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_fwd.cpp.493E9D388D70238E.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx b/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx new file mode 100755 index 0000000000..4952169b11 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_infer.cpp.69EB815355931680.idx differ diff --git a/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx b/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx new file mode 100755 index 0000000000..fe9e7cf196 Binary files /dev/null and b/.cache/clangd/index/profile_batchnorm_infer_impl.hpp.B4DE6B04DAFD57EF.idx differ diff --git a/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx b/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx new file mode 100755 index 0000000000..a260245b16 Binary files /dev/null and b/.cache/clangd/index/profile_contraction_bilinear.cpp.B7E7F8EC02159FA8.idx differ diff --git a/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx b/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx new file mode 100755 index 0000000000..b96f441f00 Binary files /dev/null and b/.cache/clangd/index/profile_contraction_impl.hpp.E5FF1E81EBC1E7D5.idx differ diff --git a/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx b/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx new file mode 100755 index 0000000000..9b77d58a87 Binary files /dev/null and b/.cache/clangd/index/profile_contraction_scale.cpp.C10C8FEB4E77E97A.idx differ diff --git a/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx b/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx new file mode 100755 index 0000000000..ac4ed498ca Binary files /dev/null and b/.cache/clangd/index/profile_contraction_utils.hpp.E0F0B9314CF06E36.idx differ diff --git a/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx b/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx new file mode 100755 index 0000000000..52dc185d2b Binary files /dev/null and b/.cache/clangd/index/profile_conv_bwd_data.cpp.E30D0E66DFF68BCC.idx differ diff --git a/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx b/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx new file mode 100755 index 0000000000..f9ee2014df Binary files /dev/null and b/.cache/clangd/index/profile_conv_bwd_data_impl.hpp.0CBAC32BD7F22884.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx b/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx new file mode 100755 index 0000000000..a530ec2b19 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd.cpp.7612DDA826C4C111.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx new file mode 100755 index 0000000000..2a3dc843dd Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu.cpp.2AED2F5E417190C4.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx new file mode 100755 index 0000000000..870b378d50 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_add.cpp.064478D11C1695BD.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx new file mode 100755 index 0000000000..3a6079734b Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_add_impl.hpp.660B8209627A1FBE.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx b/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx new file mode 100755 index 0000000000..257cfa256e Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_bias_relu_impl.hpp.0A6D3A36AE3BBBB6.idx differ diff --git a/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx b/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx new file mode 100755 index 0000000000..737b2c4929 Binary files /dev/null and b/.cache/clangd/index/profile_conv_fwd_impl.hpp.DB77593E3FC3FC2B.idx differ diff --git a/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx b/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx new file mode 100755 index 0000000000..e1a3dbb473 Binary files /dev/null and b/.cache/clangd/index/profile_conv_tensor_rearrange.cpp.50518BDD3D0D27F8.idx differ diff --git a/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx b/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx new file mode 100755 index 0000000000..b910d87098 Binary files /dev/null and b/.cache/clangd/index/profile_conv_tensor_rearrange_impl.hpp.ECD3F7A8AA337DB3.idx differ diff --git a/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx b/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx new file mode 100755 index 0000000000..e624bdc5bf Binary files /dev/null and b/.cache/clangd/index/profile_elementwise_layernorm_impl.hpp.228FBBB77B1C2212.idx differ diff --git a/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx b/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx new file mode 100755 index 0000000000..ed68824fd5 Binary files /dev/null and b/.cache/clangd/index/profile_gemm.cpp.1660D1BAA0C70B61.idx differ diff --git a/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx b/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx new file mode 100755 index 0000000000..f6acbea724 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add.cpp.B4A511D84B31E476.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx b/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx new file mode 100755 index 0000000000..8b5d2ff4ac Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_add_fastgelu.cpp.374CFB86EAD905D5.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx b/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx new file mode 100755 index 0000000000..db1d7d541f Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_add_fastgelu_impl.hpp.7165E9E08D31883A.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx b/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx new file mode 100755 index 0000000000..05b280636d Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_fastgelu.cpp.CA0713C21D4F7C04.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx b/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx new file mode 100755 index 0000000000..d8715452f9 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_fastgelu_impl.hpp.23CA5590877F722E.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx b/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx new file mode 100755 index 0000000000..e1c6261396 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_impl.hpp.A44323833CE8C5AD.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx b/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx new file mode 100755 index 0000000000..23c1724f1e Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_multiply.cpp.136C377C7D5B0E54.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx b/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx new file mode 100755 index 0000000000..7f3d50595e Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_multiply_impl.hpp.FBBEFEDC4A78D751.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx b/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx new file mode 100755 index 0000000000..3ee8f4d173 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu.cpp.7B9D4CAF29075E5F.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx new file mode 100755 index 0000000000..1fdd81d44c Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm.cpp.E30A7F678D4C6FAC.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx new file mode 100755 index 0000000000..5af399f968 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_add_layernorm_impl.hpp.5DE61D830E8EC4CC.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx b/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx new file mode 100755 index 0000000000..a6b73a4747 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_relu_impl.hpp.9153E795377986F0.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx b/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx new file mode 100755 index 0000000000..1a064c8771 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_silu.cpp.67ABB3D17F5C92B4.idx differ diff --git a/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx b/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx new file mode 100755 index 0000000000..9db8161a1f Binary files /dev/null and b/.cache/clangd/index/profile_gemm_add_silu_impl.hpp.CC18A7C19DDCC841.idx differ diff --git a/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx b/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx new file mode 100755 index 0000000000..07ee4db7a2 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bias_add_reduce.cpp.CF67B6B3D02F20A5.idx differ diff --git a/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx b/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx new file mode 100755 index 0000000000..7d54811a97 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bias_add_reduce_impl.hpp.CC4023F2075FFC72.idx differ diff --git a/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx b/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx new file mode 100755 index 0000000000..fb5a07c889 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bilinear.cpp.2A04608A0019808D.idx differ diff --git a/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx b/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx new file mode 100755 index 0000000000..1a7f7f3867 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_bilinear_impl.hpp.3D87C8E5D761FA48.idx differ diff --git a/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx b/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx new file mode 100755 index 0000000000..9e8b5dfc8d Binary files /dev/null and b/.cache/clangd/index/profile_gemm_fastgelu.cpp.2EC621FE126AC880.idx differ diff --git a/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx b/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx new file mode 100755 index 0000000000..471324fd2b Binary files /dev/null and b/.cache/clangd/index/profile_gemm_fastgelu_impl.hpp.090E8EEC37D7278D.idx differ diff --git a/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx b/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx new file mode 100755 index 0000000000..a1e8f5bd48 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_impl.hpp.DB85F2B55BD1CC04.idx differ diff --git a/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx b/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx new file mode 100755 index 0000000000..846158aba4 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_multiply_add.cpp.0030E28460A11C1F.idx differ diff --git a/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx b/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx new file mode 100755 index 0000000000..98ef2a1f0c Binary files /dev/null and b/.cache/clangd/index/profile_gemm_multiply_add_impl.hpp.A03C0417F818F4FD.idx differ diff --git a/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx b/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx new file mode 100755 index 0000000000..0ff1b9016e Binary files /dev/null and b/.cache/clangd/index/profile_gemm_reduce.cpp.0A54AE6B66160321.idx differ diff --git a/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx b/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx new file mode 100755 index 0000000000..82c975a3f5 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_reduce_impl.hpp.FE75615CA769411C.idx differ diff --git a/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx b/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx new file mode 100755 index 0000000000..b4f951d643 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_splitk.cpp.6BF00CBB81DE4660.idx differ diff --git a/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx b/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx new file mode 100755 index 0000000000..8b7c385611 Binary files /dev/null and b/.cache/clangd/index/profile_gemm_splitk_impl.hpp.1DC09EFBBC454E8F.idx differ diff --git a/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx b/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx new file mode 100755 index 0000000000..3931f5448c Binary files /dev/null and b/.cache/clangd/index/profile_gemm_streamk.cpp.46F4E02E0912E672.idx differ diff --git a/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx b/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx new file mode 100755 index 0000000000..bd495584df Binary files /dev/null and b/.cache/clangd/index/profile_gemm_streamk_impl.hpp.73FA01C66CC8D3E3.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx b/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx new file mode 100755 index 0000000000..7768d9861d Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_data.cpp.FA67D9F6149A96BE.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx b/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx new file mode 100755 index 0000000000..399286bf2d Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_data_impl.hpp.59E9CBB730B20A37.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx b/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx new file mode 100755 index 0000000000..3dd933d953 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_weight.cpp.34186A38546F70D1.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx b/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx new file mode 100755 index 0000000000..6856e1d792 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_bwd_weight_impl.hpp.A8CF2351C95F34F5.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx b/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx new file mode 100755 index 0000000000..706e95ee71 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_fwd.cpp.8D8BCE0560F621C4.idx differ diff --git a/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx b/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx new file mode 100755 index 0000000000..27e3e0c647 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_conv_fwd_impl.hpp.90847DB78EF4B739.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx b/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx new file mode 100755 index 0000000000..65fb0e7c5f Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm.cpp.7EE7588374346C13.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx b/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx new file mode 100755 index 0000000000..5b3e8c346c Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fastgelu.cpp.AC58B27586C8FEAD.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx b/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx new file mode 100755 index 0000000000..10c9f63e46 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fastgelu_impl.hpp.DE8318589739E396.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx b/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx new file mode 100755 index 0000000000..3a492754b7 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fixed_nk.cpp.12EAC200C0F64A96.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx b/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx new file mode 100755 index 0000000000..737982474d Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_fixed_nk_impl.hpp.B583CC73B5188079.idx differ diff --git a/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx b/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx new file mode 100755 index 0000000000..d4c9fd9671 Binary files /dev/null and b/.cache/clangd/index/profile_grouped_gemm_impl.hpp.0459F4415131A8AB.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx b/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx new file mode 100755 index 0000000000..eee5a691cb Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_data.cpp.B5C8D8F20885019C.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx b/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx new file mode 100755 index 0000000000..3ebb81cb4c Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_data_impl.hpp.05BE2AB219AD2046.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx new file mode 100755 index 0000000000..5080ed9b10 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta.cpp.0D9708552C2EDA92.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx new file mode 100755 index 0000000000..2ff61fc6bc Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_bwd_gamma_beta_impl.hpp.F912A6BA3766E9B6.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx b/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx new file mode 100755 index 0000000000..7156c186bd Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_fwd.cpp.B71B17B33AF365C7.idx differ diff --git a/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx b/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx new file mode 100755 index 0000000000..275bb734c7 Binary files /dev/null and b/.cache/clangd/index/profile_groupnorm_fwd_impl.hpp.DD036B9EC45A3789.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx b/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx new file mode 100755 index 0000000000..5eeac590a6 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_data.cpp.EDB022306AF7F904.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx b/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx new file mode 100755 index 0000000000..95428076a8 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_data_impl.hpp.CF00FB7A122475B2.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx new file mode 100755 index 0000000000..c4990ca5cc Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta.cpp.58ACE0ADA4571F63.idx differ diff --git a/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx new file mode 100755 index 0000000000..4bffceb08e Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_bwd_gamma_beta_impl.hpp.672AE63374836A4E.idx differ diff --git a/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx b/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx new file mode 100755 index 0000000000..007049bc50 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_fwd.cpp.FA9A3EA5A9ABAF4C.idx differ diff --git a/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx b/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx new file mode 100755 index 0000000000..71db8cc202 Binary files /dev/null and b/.cache/clangd/index/profile_layernorm_fwd_impl.hpp.BF1975AA887FB0A6.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx b/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx new file mode 100755 index 0000000000..2c523db257 Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_bwd.cpp.7793240AA812E755.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx b/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx new file mode 100755 index 0000000000..2a63bf051e Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_bwd_impl.hpp.885D2D1066A38A2E.idx differ diff --git a/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx b/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx new file mode 100755 index 0000000000..3ac6f2a577 Binary files /dev/null and b/.cache/clangd/index/profile_max_pool3d_fwd.cpp.11655872CD06FCBE.idx differ diff --git a/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx b/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx new file mode 100755 index 0000000000..73d2b9eae7 Binary files /dev/null and b/.cache/clangd/index/profile_permute_scale.cpp.A23434A32C62DC3B.idx differ diff --git a/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx b/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx new file mode 100755 index 0000000000..2baeed23f6 Binary files /dev/null and b/.cache/clangd/index/profile_permute_scale_impl.hpp.9398D48C7353608C.idx differ diff --git a/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx b/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx new file mode 100755 index 0000000000..1870c6cd48 Binary files /dev/null and b/.cache/clangd/index/profile_pool3d_fwd_impl.hpp.4E7051C23FCACC74.idx differ diff --git a/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx b/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx new file mode 100755 index 0000000000..c961d17fab Binary files /dev/null and b/.cache/clangd/index/profile_reduce.cpp.7CFAAB4D3A545696.idx differ diff --git a/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx b/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx new file mode 100755 index 0000000000..494b10d5b6 Binary files /dev/null and b/.cache/clangd/index/profile_reduce_impl.hpp.17B63EE6A3A6D55C.idx differ diff --git a/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx b/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx new file mode 100755 index 0000000000..34318f6439 Binary files /dev/null and b/.cache/clangd/index/profile_softmax.cpp.31A9AF77C4279825.idx differ diff --git a/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx b/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx new file mode 100755 index 0000000000..8e742d8fc3 Binary files /dev/null and b/.cache/clangd/index/profile_softmax_impl.hpp.28DC8A49A2D3559F.idx differ diff --git a/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx b/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx new file mode 100755 index 0000000000..4e3cd18a8b Binary files /dev/null and b/.cache/clangd/index/profile_transpose.cpp.3798E55CF7D973A7.idx differ diff --git a/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx b/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx new file mode 100755 index 0000000000..556406af58 Binary files /dev/null and b/.cache/clangd/index/profile_transpose_impl.hpp.8CE368E3C650AFE1.idx differ diff --git a/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx b/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx new file mode 100755 index 0000000000..735e09e666 Binary files /dev/null and b/.cache/clangd/index/profiler.cpp.A1998E185346D702.idx differ diff --git a/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx b/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx new file mode 100755 index 0000000000..e2a512c4c4 Binary files /dev/null and b/.cache/clangd/index/profiler_operation_registry.hpp.C3738D3851333A5F.idx differ diff --git a/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx b/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx new file mode 100755 index 0000000000..46d3175f8a Binary files /dev/null and b/.cache/clangd/index/put_element_fp16.cpp.4CB3E91BA1C3425F.idx differ diff --git a/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx b/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx new file mode 100755 index 0000000000..f61c5b0a61 Binary files /dev/null and b/.cache/clangd/index/quantization_operation.hpp.BD94143B871C7D8C.idx differ diff --git a/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx b/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx new file mode 100755 index 0000000000..217b12d2b9 Binary files /dev/null and b/.cache/clangd/index/random_gen.hpp.B234A29FCA71839D.idx differ diff --git a/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx b/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx new file mode 100755 index 0000000000..737d3ebf8d Binary files /dev/null and b/.cache/clangd/index/ranges.hpp.66B5D2199BD23CD4.idx differ diff --git a/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx b/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx new file mode 100755 index 0000000000..4442961413 Binary files /dev/null and b/.cache/clangd/index/reduce.hpp.7DC5A811A95F14CB.idx differ diff --git a/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx b/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx new file mode 100755 index 0000000000..cf0ddca2e4 Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise.cpp.E231BF6C564BCCFD.idx differ diff --git a/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx b/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx new file mode 100755 index 0000000000..080cb5f56f Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise_impl.hpp.B38B56E3F9BFC70D.idx differ diff --git a/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx b/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx new file mode 100755 index 0000000000..22bf457f3c Binary files /dev/null and b/.cache/clangd/index/reduce_blockwise_two_call.cpp.C62D3A32F608E374.idx differ diff --git a/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx b/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx new file mode 100755 index 0000000000..1c59e58464 Binary files /dev/null and b/.cache/clangd/index/reduce_example_common.hpp.A3D8D5EB42288D30.idx differ diff --git a/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx b/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx new file mode 100755 index 0000000000..87d20412c8 Binary files /dev/null and b/.cache/clangd/index/reduce_multiblock_atomic_add.cpp.22DBB7B98E8ABAE4.idx differ diff --git a/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx b/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx new file mode 100755 index 0000000000..89b1e0fb34 Binary files /dev/null and b/.cache/clangd/index/reduce_multiblock_atomic_add_impl.hpp.014F022BC601B7F9.idx differ diff --git a/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx b/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx new file mode 100755 index 0000000000..173881a446 Binary files /dev/null and b/.cache/clangd/index/reduce_no_index.cpp.3EDCFAA9D727E5B6.idx differ diff --git a/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx b/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx new file mode 100755 index 0000000000..a165e1ee8e Binary files /dev/null and b/.cache/clangd/index/reduce_with_index.cpp.F84EAB2F5A59285D.idx differ diff --git a/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx b/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx new file mode 100755 index 0000000000..dd6f8bfcdf Binary files /dev/null and b/.cache/clangd/index/reduction_common.hpp.C00F2970D4BF7FA3.idx differ diff --git a/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx b/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx new file mode 100755 index 0000000000..aa47ab062f Binary files /dev/null and b/.cache/clangd/index/reduction_enums.hpp.871AE7D90DC9E069.idx differ diff --git a/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx b/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx new file mode 100755 index 0000000000..6375331dde Binary files /dev/null and b/.cache/clangd/index/reduction_functions_accumulate.hpp.7D2C2CFC245627F0.idx differ diff --git a/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx b/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx new file mode 100755 index 0000000000..3ce2b703e9 Binary files /dev/null and b/.cache/clangd/index/reduction_functions_blockwise.hpp.C958E7F57DF3365E.idx differ diff --git a/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx b/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx new file mode 100755 index 0000000000..997f46ae53 Binary files /dev/null and b/.cache/clangd/index/reduction_functions_threadwise.hpp.7A06A2856650B185.idx differ diff --git a/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx b/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx new file mode 100755 index 0000000000..fc9e0fe29b Binary files /dev/null and b/.cache/clangd/index/reduction_operator.hpp.28F266F9F2F32A7B.idx differ diff --git a/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx b/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx new file mode 100755 index 0000000000..0845ac4101 Binary files /dev/null and b/.cache/clangd/index/reduction_operator_mapping.hpp.8A11EE4B065A6CAE.idx differ diff --git a/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx b/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx new file mode 100755 index 0000000000..4b0c748f9e Binary files /dev/null and b/.cache/clangd/index/reference_avgpool_bwd.hpp.EF9EF399DB15462F.idx differ diff --git a/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx b/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx new file mode 100755 index 0000000000..fc7a61431d Binary files /dev/null and b/.cache/clangd/index/reference_batched_gemm.hpp.AF38237B103D733E.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx b/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx new file mode 100755 index 0000000000..0e02d9a8a0 Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_backward.hpp.DB836B16A0EC29D6.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx b/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx new file mode 100755 index 0000000000..9959d7ff32 Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_forward.hpp.0421D3C13A4F2108.idx differ diff --git a/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx b/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx new file mode 100755 index 0000000000..0f08238702 Binary files /dev/null and b/.cache/clangd/index/reference_batchnorm_infer.hpp.B110A1DBC8883C05.idx differ diff --git a/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx b/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx new file mode 100755 index 0000000000..3cbf35ab10 Binary files /dev/null and b/.cache/clangd/index/reference_cgemm.hpp.9B0EE32433381722.idx differ diff --git a/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx b/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx new file mode 100755 index 0000000000..1335c220bf Binary files /dev/null and b/.cache/clangd/index/reference_column_to_image.hpp.B1B86724AE64837E.idx differ diff --git a/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx b/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx new file mode 100755 index 0000000000..19acb98c63 Binary files /dev/null and b/.cache/clangd/index/reference_contraction.hpp.4808E834C3E32AEB.idx differ diff --git a/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx b/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx new file mode 100755 index 0000000000..e902493090 Binary files /dev/null and b/.cache/clangd/index/reference_conv_bwd_data.hpp.7B56299251FBE761.idx differ diff --git a/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx b/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx new file mode 100755 index 0000000000..77a1863199 Binary files /dev/null and b/.cache/clangd/index/reference_conv_bwd_weight.hpp.474E92061BE55B30.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx b/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx new file mode 100755 index 0000000000..9d1db7cf73 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd.cpp.3703F6A90F557994.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx b/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx new file mode 100755 index 0000000000..b3c0741b52 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd.hpp.4266D54C9E6D337C.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx b/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx new file mode 100755 index 0000000000..97991a850b Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd_bias_activation.hpp.CC5C1A67067C17BD.idx differ diff --git a/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx b/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx new file mode 100755 index 0000000000..a8d304c8b3 Binary files /dev/null and b/.cache/clangd/index/reference_conv_fwd_bias_activation_add.hpp.05EAEEBBFD920597.idx differ diff --git a/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx b/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx new file mode 100755 index 0000000000..9d3d50d607 Binary files /dev/null and b/.cache/clangd/index/reference_gemm.hpp.0A74F0F3AFBAC0AB.idx differ diff --git a/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx b/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx new file mode 100755 index 0000000000..9e7ec451dd Binary files /dev/null and b/.cache/clangd/index/reference_gemm_layernorm.hpp.9AA4C354D9D24BEC.idx differ diff --git a/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx b/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx new file mode 100755 index 0000000000..6fcc433bc0 Binary files /dev/null and b/.cache/clangd/index/reference_groupnorm.hpp.368D3290BA70398F.idx differ diff --git a/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx b/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx new file mode 100755 index 0000000000..6720ddc841 Binary files /dev/null and b/.cache/clangd/index/reference_groupnorm_bwd.hpp.C24505C3C5BFB298.idx differ diff --git a/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx b/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx new file mode 100755 index 0000000000..6b59f0ac40 Binary files /dev/null and b/.cache/clangd/index/reference_image_to_column.hpp.14F89CCD7031E508.idx differ diff --git a/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx b/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx new file mode 100755 index 0000000000..f3f3e04ba3 Binary files /dev/null and b/.cache/clangd/index/reference_layernorm.hpp.B9C72CF73E810635.idx differ diff --git a/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx b/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx new file mode 100755 index 0000000000..bc1371b4b6 Binary files /dev/null and b/.cache/clangd/index/reference_layernorm_bwd.hpp.F0026FDE37931C67.idx differ diff --git a/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx b/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx new file mode 100755 index 0000000000..77b6d6339a Binary files /dev/null and b/.cache/clangd/index/reference_maxpool_bwd.hpp.AA4F2921ED8F7A67.idx differ diff --git a/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx b/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx new file mode 100755 index 0000000000..2edc966129 Binary files /dev/null and b/.cache/clangd/index/reference_pool_fwd.hpp.3BB9B8E57191E80A.idx differ diff --git a/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx b/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx new file mode 100755 index 0000000000..b25d5edb8f Binary files /dev/null and b/.cache/clangd/index/reference_reduce.hpp.0C04C3B5F65B2DFD.idx differ diff --git a/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx b/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx new file mode 100755 index 0000000000..a7ff00d725 Binary files /dev/null and b/.cache/clangd/index/reference_softmax.hpp.4C7847F5A451A485.idx differ diff --git a/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx b/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx new file mode 100755 index 0000000000..8bbb9e4323 Binary files /dev/null and b/.cache/clangd/index/reference_sparse_embedding3_forward_layernorm.hpp.495E2A0ED186B796.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx b/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx new file mode 100755 index 0000000000..d5c74b0376 Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_example.inc.341E18570766CD6C.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx b/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx new file mode 100755 index 0000000000..904c3f9388 Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_gemm_example.inc.B0C303C33F007446.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx new file mode 100755 index 0000000000..24757b5708 Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm.inc.35534F29FB76E4C9.idx differ diff --git a/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx new file mode 100755 index 0000000000..58c5a11015 Binary files /dev/null and b/.cache/clangd/index/run_batched_gemm_scale_softmax_gemm_permute.inc.DF3E33E757137FA7.idx differ diff --git a/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx b/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx new file mode 100755 index 0000000000..30da03eb6e Binary files /dev/null and b/.cache/clangd/index/run_contraction_bilinear_example.inc.427C55016011CF17.idx differ diff --git a/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx b/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx new file mode 100755 index 0000000000..cd3f3bec00 Binary files /dev/null and b/.cache/clangd/index/run_contraction_scale_example.inc.619AD6EEB2674ADA.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx b/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx new file mode 100755 index 0000000000..38c38a3645 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_bias_perchannel_quantization_example.inc.453F2B26B4A84A7C.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx b/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx new file mode 100755 index 0000000000..3805dff001 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_bias_perlayer_quantization_example.inc.41E3948246A8DC23.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx b/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx new file mode 100755 index 0000000000..9a5ce3c3d5 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_perchannel_quantization_example.inc.4B57FDA755059955.idx differ diff --git a/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx b/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx new file mode 100755 index 0000000000..55a3255556 Binary files /dev/null and b/.cache/clangd/index/run_conv2d_fwd_perlayer_quantization_example.inc.3D388FA1A8D19B72.idx differ diff --git a/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx b/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx new file mode 100755 index 0000000000..2c90c4a8a9 Binary files /dev/null and b/.cache/clangd/index/run_convnd_activ_example.inc.F2FB9ED6F9BA4B8A.idx differ diff --git a/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx b/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx new file mode 100755 index 0000000000..6c2e11a0c4 Binary files /dev/null and b/.cache/clangd/index/run_convnd_fwd_example.inc.6C6FF11BF5A73D93.idx differ diff --git a/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx b/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx new file mode 100755 index 0000000000..18b6d71166 Binary files /dev/null and b/.cache/clangd/index/run_convnd_fwd_max_example.inc.4C55B2EACEAC1C07.idx differ diff --git a/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx b/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx new file mode 100755 index 0000000000..b762d57779 Binary files /dev/null and b/.cache/clangd/index/run_gemm_add_add_fastgelu_example.inc.E8AFCCD92C8D9967.idx differ diff --git a/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx b/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx new file mode 100755 index 0000000000..93c15d941c Binary files /dev/null and b/.cache/clangd/index/run_gemm_add_multiply_example.inc.3C87D5BB1B980F33.idx differ diff --git a/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx b/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx new file mode 100755 index 0000000000..96da48915b Binary files /dev/null and b/.cache/clangd/index/run_gemm_example.inc.4868A822F4677354.idx differ diff --git a/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx b/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx new file mode 100755 index 0000000000..fc965a4f57 Binary files /dev/null and b/.cache/clangd/index/run_gemm_test.inc.B7894CAD83BCCCF5.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx b/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx new file mode 100755 index 0000000000..ae56482ff0 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_data_bias_relu_example.inc.1D7F06778A26066D.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx b/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx new file mode 100755 index 0000000000..1c6903acd6 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_data_example.inc.11E97FA73102DBD5.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx b/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx new file mode 100755 index 0000000000..f62070f5bc Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_bwd_weight_example.inc.EE2E4EE0E60E4D81.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx b/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx new file mode 100755 index 0000000000..4a425ad628 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_conv_fwd_example.inc.88B5CE7F22532219.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx b/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx new file mode 100755 index 0000000000..15f5e058e0 Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_fwd_bias_relu_add_example.inc.7B0A8630D499DF28.idx differ diff --git a/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx b/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx new file mode 100755 index 0000000000..c17461c5ad Binary files /dev/null and b/.cache/clangd/index/run_grouped_conv_fwd_example.inc.C65A743D8AABBB7C.idx differ diff --git a/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx b/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx new file mode 100755 index 0000000000..5782d15fad Binary files /dev/null and b/.cache/clangd/index/run_grouped_gemm_example.inc.FE653C75BFA23618.idx differ diff --git a/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx b/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx new file mode 100755 index 0000000000..5b78a1f6b9 Binary files /dev/null and b/.cache/clangd/index/run_grouped_gemm_scale_softmax_gemm_permute.inc.0CAE0C43C07E96CE.idx differ diff --git a/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx b/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx new file mode 100755 index 0000000000..f885b602dd Binary files /dev/null and b/.cache/clangd/index/run_groupnorm_fwd_example.inc.85BD21F884D2FB4A.idx differ diff --git a/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx b/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx new file mode 100755 index 0000000000..c6ce91b106 Binary files /dev/null and b/.cache/clangd/index/run_layernorm4d_fwd_example.inc.8870D57CA6553449.idx differ diff --git a/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx b/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx new file mode 100755 index 0000000000..93845507d1 Binary files /dev/null and b/.cache/clangd/index/run_layernorm_example.inc.DA0460DDD4EF632F.idx differ diff --git a/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx b/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx new file mode 100755 index 0000000000..4a84c04a1c Binary files /dev/null and b/.cache/clangd/index/run_permute_bundle_example.inc.419BC377BBAE0EEA.idx differ diff --git a/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx b/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx new file mode 100755 index 0000000000..94fa1311cf Binary files /dev/null and b/.cache/clangd/index/run_permute_element_example.inc.42CC4C3518B6F771.idx differ diff --git a/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx b/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx new file mode 100755 index 0000000000..1ddffcde20 Binary files /dev/null and b/.cache/clangd/index/run_splitK_gemm_example.inc.51FAC96E5EFEA0F0.idx differ diff --git a/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx b/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx new file mode 100755 index 0000000000..d23d93d8d3 Binary files /dev/null and b/.cache/clangd/index/sequence.hpp.25AC5091073FF3EB.idx differ diff --git a/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx b/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx new file mode 100755 index 0000000000..08824b8fba Binary files /dev/null and b/.cache/clangd/index/sequence_helper.hpp.2282D53015E164C0.idx differ diff --git a/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx b/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx new file mode 100755 index 0000000000..ff5a874c69 Binary files /dev/null and b/.cache/clangd/index/softmax.hpp.2F97791F40A73FC9.idx differ diff --git a/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx b/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx new file mode 100755 index 0000000000..4653326c54 Binary files /dev/null and b/.cache/clangd/index/softmax_blockwise.cpp.3EE65033A8858975.idx differ diff --git a/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx b/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx new file mode 100755 index 0000000000..84f616a5d3 Binary files /dev/null and b/.cache/clangd/index/space_filling_curve.cpp.EB322F6A66137D22.idx differ diff --git a/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx b/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx new file mode 100755 index 0000000000..a749407307 Binary files /dev/null and b/.cache/clangd/index/span.hpp.D6316D1D55F0830C.idx differ diff --git a/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx b/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx new file mode 100755 index 0000000000..63d2b0df56 Binary files /dev/null and b/.cache/clangd/index/sparse_embedding3_forward_layernorm.cpp.CF5214A930A81517.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx b/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx new file mode 100755 index 0000000000..9c28e906b8 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_bf16.cpp.AB98F65366A9F1A9.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx b/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx new file mode 100755 index 0000000000..d41899de2b Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp16.cpp.DBCD688A2F933855.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx b/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx new file mode 100755 index 0000000000..3506d242fd Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp16_fp8.cpp.60034E8F6043ABD9.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx b/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx new file mode 100755 index 0000000000..6bfd4425b3 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_fp32.cpp.A3B8F8ECA755E6B2.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx b/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx new file mode 100755 index 0000000000..dcba2b253b Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_int8.cpp.4600C416992C0160.idx differ diff --git a/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx b/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx new file mode 100755 index 0000000000..2b8f7f1545 Binary files /dev/null and b/.cache/clangd/index/splitK_gemm_xdl_lds_direct_load_fp16.cpp.2D633E2EB00CCD78.idx differ diff --git a/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx new file mode 100755 index 0000000000..d18dbd1b76 Binary files /dev/null and b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp16.cpp.3597E35811A1130C.idx differ diff --git a/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx new file mode 100755 index 0000000000..79ce2caa39 Binary files /dev/null and b/.cache/clangd/index/splitk_gemm_bias_e_permute_xdl_fp32.cpp.8A4C951A87984080.idx differ diff --git a/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx b/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx new file mode 100755 index 0000000000..679748e6a5 Binary files /dev/null and b/.cache/clangd/index/static_buffer.hpp.E059C0D43CFA4618.idx differ diff --git a/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx b/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx new file mode 100755 index 0000000000..27ada29894 Binary files /dev/null and b/.cache/clangd/index/static_tensor.hpp.55059299885EB585.idx differ diff --git a/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx b/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx new file mode 100755 index 0000000000..4e71ab4ae2 Binary files /dev/null and b/.cache/clangd/index/statically_indexed_array.hpp.8A80A7A4CC47B954.idx differ diff --git a/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx b/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx new file mode 100755 index 0000000000..b938c54604 Binary files /dev/null and b/.cache/clangd/index/statically_indexed_array_multi_index.hpp.E363636635FF265A.idx differ diff --git a/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx b/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx new file mode 100755 index 0000000000..00718ad8b7 Binary files /dev/null and b/.cache/clangd/index/stream_config.hpp.015AF40CD82DD077.idx differ diff --git a/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx b/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx new file mode 100755 index 0000000000..e44387cf8f Binary files /dev/null and b/.cache/clangd/index/stream_utility.hpp.7DD00DAAC010212E.idx differ diff --git a/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx b/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx new file mode 100755 index 0000000000..978a463d32 Binary files /dev/null and b/.cache/clangd/index/synchronization.hpp.94B924A949B60CF0.idx differ diff --git a/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx b/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx new file mode 100755 index 0000000000..06c01cec00 Binary files /dev/null and b/.cache/clangd/index/tensor.hpp.C84FDD70C53B91FD.idx differ diff --git a/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx b/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx new file mode 100755 index 0000000000..78e734459e Binary files /dev/null and b/.cache/clangd/index/tensor_adaptor.hpp.1AABBD498E3DDABA.idx differ diff --git a/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx b/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx new file mode 100755 index 0000000000..50e2c601b6 Binary files /dev/null and b/.cache/clangd/index/tensor_descriptor.hpp.7E73ABD05058119F.idx differ diff --git a/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx b/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx new file mode 100755 index 0000000000..5f4f3ae090 Binary files /dev/null and b/.cache/clangd/index/tensor_descriptor_helper.hpp.B97BC6F42C53AF70.idx differ diff --git a/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx b/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx new file mode 100755 index 0000000000..413add1ed1 Binary files /dev/null and b/.cache/clangd/index/tensor_layout.hpp.83E496410F43D7E9.idx differ diff --git a/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx b/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx new file mode 100755 index 0000000000..c0acbb0cfd Binary files /dev/null and b/.cache/clangd/index/tensor_partition.hpp.2F54ACF04D747753.idx differ diff --git a/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx b/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx new file mode 100755 index 0000000000..ad114cdc23 Binary files /dev/null and b/.cache/clangd/index/tensor_space_filling_curve.hpp.058953EFBD7A2B25.idx differ diff --git a/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx b/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx new file mode 100755 index 0000000000..d113e54a34 Binary files /dev/null and b/.cache/clangd/index/tensor_specialization.hpp.5BC1AF8D88F729F2.idx differ diff --git a/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx b/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx new file mode 100755 index 0000000000..5bdd303497 Binary files /dev/null and b/.cache/clangd/index/tensor_utils.hpp.B92D33FF5A9EED5B.idx differ diff --git a/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx b/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx new file mode 100755 index 0000000000..2420f306fe Binary files /dev/null and b/.cache/clangd/index/test_avg_pool3d_bwd.cpp.6DD8FAFEF9A89C97.idx differ diff --git a/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx b/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx new file mode 100755 index 0000000000..d9f03ea0a1 Binary files /dev/null and b/.cache/clangd/index/test_avg_pool3d_fwd.cpp.5274F49320881D42.idx differ diff --git a/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx b/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx new file mode 100755 index 0000000000..aab8245f58 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm.cpp.A022B37F45A32155.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx new file mode 100755 index 0000000000..ab5b34b003 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp.52CCAAF6582C3CD0.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx new file mode 100755 index 0000000000..1208380bcf Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp.FB71D53BC1DE7E13.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx new file mode 100755 index 0000000000..2b57bd59be Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_bias_softmax_gemm_permute_util.hpp.38E1B00A8B9BAD43.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx b/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx new file mode 100755 index 0000000000..c72336ba24 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_gemm_fp16.cpp.A1B1DD17614F30D3.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx b/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx new file mode 100755 index 0000000000..24635c2285 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_gemm_util.hpp.1FF4EC29A4661CFF.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx new file mode 100755 index 0000000000..f43f257527 Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_fp16.cpp.0592033B8F1CD807.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx new file mode 100755 index 0000000000..b455b4e79c Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_bf16.cpp.EDC4F40BC781A6DC.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx new file mode 100755 index 0000000000..a1a75f4b8b Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_fp16.cpp.0D1473C6C2BC0BFC.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx new file mode 100755 index 0000000000..1813740dea Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_permute_util.hpp.DD433B78FEA57450.idx differ diff --git a/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx b/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx new file mode 100755 index 0000000000..69bc64742c Binary files /dev/null and b/.cache/clangd/index/test_batched_gemm_softmax_gemm_util.hpp.C46CFF8584087968.idx differ diff --git a/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx b/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx new file mode 100755 index 0000000000..0682e3b1e6 Binary files /dev/null and b/.cache/clangd/index/test_bf8.cpp.16F0A495F47E95CC.idx differ diff --git a/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx b/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx new file mode 100755 index 0000000000..353428a23a Binary files /dev/null and b/.cache/clangd/index/test_block_to_ctile_map.cpp.D0968ED6B32C18D9.idx differ diff --git a/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx b/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx new file mode 100755 index 0000000000..256ef6d681 Binary files /dev/null and b/.cache/clangd/index/test_contraction.cpp.6A4280349E6B5DE6.idx differ diff --git a/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx b/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx new file mode 100755 index 0000000000..0bd764ffd8 Binary files /dev/null and b/.cache/clangd/index/test_contraction_interface.cpp.51D57FFCD74AC94A.idx differ diff --git a/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx b/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx new file mode 100755 index 0000000000..67659fc975 Binary files /dev/null and b/.cache/clangd/index/test_conv_tensor_rearrange.cpp.69AF0D8F1794ACAD.idx differ diff --git a/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx b/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx new file mode 100755 index 0000000000..00cc6564d6 Binary files /dev/null and b/.cache/clangd/index/test_conv_tensor_rearrange_interface.cpp.6D91D3DCA5B6D134.idx differ diff --git a/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx b/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx new file mode 100755 index 0000000000..719f089373 Binary files /dev/null and b/.cache/clangd/index/test_elementwise_layernorm_fp16.cpp.6E7042446B0FE0D3.idx differ diff --git a/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx b/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx new file mode 100755 index 0000000000..a927a36715 Binary files /dev/null and b/.cache/clangd/index/test_fp8.cpp.AF44E860BEF4215C.idx differ diff --git a/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx b/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx new file mode 100755 index 0000000000..8007f921e3 Binary files /dev/null and b/.cache/clangd/index/test_gemm_add.hpp.EFC09082310C599D.idx differ diff --git a/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx b/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx new file mode 100755 index 0000000000..349660da6a Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_fastgelu.cpp.9C90F62B27922BE9.idx differ diff --git a/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx b/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx new file mode 100755 index 0000000000..81a4022ce4 Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_relu.cpp.6A11991389EEE204.idx differ diff --git a/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx b/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx new file mode 100755 index 0000000000..5e92621d55 Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_relu_add_layernorm_fp16.cpp.E9706C96D2E415C1.idx differ diff --git a/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx b/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx new file mode 100755 index 0000000000..2b0530567e Binary files /dev/null and b/.cache/clangd/index/test_gemm_add_silu.cpp.0C3DE18B907F5A14.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx b/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx new file mode 100755 index 0000000000..dab26e89a6 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk.cpp.921AD8C68523E94F.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx b/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx new file mode 100755 index 0000000000..6b4b30f532 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk_ut_cases.inc.CD038407CD6C080D.idx differ diff --git a/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx b/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx new file mode 100755 index 0000000000..6168bf6662 Binary files /dev/null and b/.cache/clangd/index/test_gemm_splitk_util.hpp.D5E2B28B24FEB85E.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx b/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx new file mode 100755 index 0000000000..b99e72e84a Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_data.cpp.BA5BCB136A7B5CF1.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx b/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx new file mode 100755 index 0000000000..668ec02b1c Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_data_interface_xdl.cpp.8747321B4790F80F.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx b/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx new file mode 100755 index 0000000000..ec2bf58c50 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_weight.cpp.1E0C24433031CA72.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx b/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx new file mode 100755 index 0000000000..453c299c2f Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_bwd_weight_interface_xdl.cpp.B93D201E166036D3.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx b/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx new file mode 100755 index 0000000000..c8e54471bd Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd.cpp.5B423DD33073B028.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx b/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx new file mode 100755 index 0000000000..b39ff4518c Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd_multi_ab_interface.cpp.D44C4377C972C730.idx differ diff --git a/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx b/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx new file mode 100755 index 0000000000..621e701f41 Binary files /dev/null and b/.cache/clangd/index/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp.D8EE1DD81FCEA622.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx b/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx new file mode 100755 index 0000000000..5394c22e18 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_interface.cpp.BFD580574FDC4028.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx b/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx new file mode 100755 index 0000000000..5b58186ef4 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_splitk.cpp.1985BB1B09859227.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx b/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx new file mode 100755 index 0000000000..8001fb3df3 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_ut_cases.inc.B622DD65303F1D04.idx differ diff --git a/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx b/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx new file mode 100755 index 0000000000..976f0d8755 Binary files /dev/null and b/.cache/clangd/index/test_grouped_gemm_util.hpp.62D7D32AEF140979.idx differ diff --git a/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx b/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx new file mode 100755 index 0000000000..87519167b0 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_bwd_data_fp32.cpp.EF83656CE5FBCD24.idx differ diff --git a/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx b/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx new file mode 100755 index 0000000000..a64ab64d81 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_bwd_gamma_beta_fp32.cpp.266D9654B5E1C67F.idx differ diff --git a/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx b/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx new file mode 100755 index 0000000000..15fac964c6 Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_fwd_fp16.cpp.E01BDD088F0473FE.idx differ diff --git a/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx b/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx new file mode 100755 index 0000000000..63ca52661e Binary files /dev/null and b/.cache/clangd/index/test_groupnorm_fwd_fp32.cpp.06336278490A0D06.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx b/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx new file mode 100755 index 0000000000..d6adb6e5cc Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_bwd_data_fp32.cpp.65C39944BBD4A16D.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx b/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx new file mode 100755 index 0000000000..77f0325b79 Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_bwd_gamma_beta_fp32.cpp.B3B6429EB0BBF1C4.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx b/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx new file mode 100755 index 0000000000..dc2d7531a9 Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_fwd_fp16.cpp.49CB42570B950D66.idx differ diff --git a/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx b/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx new file mode 100755 index 0000000000..bde3c4a0ed Binary files /dev/null and b/.cache/clangd/index/test_layernorm2d_fwd_fp32.cpp.57C9823195A1B889.idx differ diff --git a/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx b/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx new file mode 100755 index 0000000000..027de8bca2 Binary files /dev/null and b/.cache/clangd/index/test_layernorm4d_fwd_fp16.cpp.4CF1AE7F957B873D.idx differ diff --git a/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx b/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx new file mode 100755 index 0000000000..791cfa9699 Binary files /dev/null and b/.cache/clangd/index/test_max_pool3d_bwd.cpp.AF800737CBB45892.idx differ diff --git a/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx b/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx new file mode 100755 index 0000000000..abc44f4fb7 Binary files /dev/null and b/.cache/clangd/index/test_max_pool3d_fwd.cpp.BE8A2BFD19420616.idx differ diff --git a/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx b/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx new file mode 100755 index 0000000000..efaa9296ea Binary files /dev/null and b/.cache/clangd/index/test_permute_scale.cpp.0607B34D8846C932.idx differ diff --git a/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx b/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx new file mode 100755 index 0000000000..1212340ded Binary files /dev/null and b/.cache/clangd/index/test_pool_fwd_common.hpp.CD53AEA46B3FF1EB.idx differ diff --git a/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx b/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx new file mode 100755 index 0000000000..ae6630a48e Binary files /dev/null and b/.cache/clangd/index/test_softmax_interface.cpp.9F0A118BAF3CB91E.idx differ diff --git a/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx b/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx new file mode 100755 index 0000000000..3f4241702e Binary files /dev/null and b/.cache/clangd/index/test_softmax_rank3.cpp.F74F60A3C98E1A4C.idx differ diff --git a/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx b/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx new file mode 100755 index 0000000000..87c8bd2570 Binary files /dev/null and b/.cache/clangd/index/test_softmax_rank4.cpp.05FB4DA05EDB8025.idx differ diff --git a/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx b/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx new file mode 100755 index 0000000000..eae4e8b7f8 Binary files /dev/null and b/.cache/clangd/index/test_softmax_ut_cases.inc.A86C258A9B038466.idx differ diff --git a/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx b/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx new file mode 100755 index 0000000000..d2126fc80a Binary files /dev/null and b/.cache/clangd/index/test_softmax_util.hpp.A85F4CCFF59B9D2C.idx differ diff --git a/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx b/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx new file mode 100755 index 0000000000..26fb935e79 Binary files /dev/null and b/.cache/clangd/index/test_transpose.cpp.EA23A250A71A4476.idx differ diff --git a/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx b/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx new file mode 100755 index 0000000000..b0822c47c8 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_copy.cpp.732CF6ED569867C1.idx differ diff --git a/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx b/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx new file mode 100755 index 0000000000..888a4a2a8a Binary files /dev/null and b/.cache/clangd/index/test_wrapper_gemm.cpp.74B7113BB3F74220.idx differ diff --git a/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx b/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx new file mode 100755 index 0000000000..49228f9cc3 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_layout.cpp.6A8EFC4867A12799.idx differ diff --git a/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx b/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx new file mode 100755 index 0000000000..0e37ec3b0c Binary files /dev/null and b/.cache/clangd/index/test_wrapper_partition.cpp.EDFCA4C2C7218226.idx differ diff --git a/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx b/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx new file mode 100755 index 0000000000..4577e6c730 Binary files /dev/null and b/.cache/clangd/index/test_wrapper_tensor.cpp.A4AB6B2A22D4689E.idx differ diff --git a/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx b/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx new file mode 100755 index 0000000000..f86d2531a9 Binary files /dev/null and b/.cache/clangd/index/thread_group.hpp.568EEF4CF01C2640.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx new file mode 100755 index 0000000000..7a45d33ad5 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_direct_load.hpp.5F79EA1E492991C0.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx new file mode 100755 index 0000000000..f4738a483e Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v4r1.hpp.CC30E09EE6FAA0EA.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx new file mode 100755 index 0000000000..5ef0ff2ddb Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1.hpp.F8F9EEB58869763C.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx new file mode 100755 index 0000000000..97ce4e8363 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r1r2.hpp.34BED592A25E7D4A.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx new file mode 100755 index 0000000000..b6bea96007 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r2.hpp.B8541658FFD968DD.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx new file mode 100755 index 0000000000..19e6e5742f Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v6r3.hpp.F9C1F18AD045D6D7.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx new file mode 100755 index 0000000000..0fc4218a34 Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7.hpp.85ADAEC27150419B.idx differ diff --git a/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx new file mode 100755 index 0000000000..5176faff8e Binary files /dev/null and b/.cache/clangd/index/thread_group_tensor_slice_transfer_v7r2.hpp.E55C206DD2AD1B19.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx new file mode 100755 index 0000000000..501c836879 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer.hpp.CE773F216A04AFAA.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx new file mode 100755 index 0000000000..c76e2c2709 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v3r1.hpp.2D0C9495DC723ACC.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx new file mode 100755 index 0000000000..3c79853144 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v4r1.hpp.42FD0DF31A3BFDC2.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx new file mode 100755 index 0000000000..01fe10d83a Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1.hpp.7AB8A56ED737D765.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx new file mode 100755 index 0000000000..952c3bdd42 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r1r2.hpp.AD9623F82BA99EFC.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx new file mode 100755 index 0000000000..e9dc97dc49 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r2.hpp.D28171A7A44ECC24.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx new file mode 100755 index 0000000000..9300b0337c Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v6r3.hpp.C25C8A66C4CF26F7.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx new file mode 100755 index 0000000000..cc94facf53 Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7.hpp.5C98C34294C833A3.idx differ diff --git a/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx new file mode 100755 index 0000000000..20fa3b763d Binary files /dev/null and b/.cache/clangd/index/threadwise_tensor_slice_transfer_v7r2.hpp.891A886C76B49727.idx differ diff --git a/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx b/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx new file mode 100755 index 0000000000..9c1f309e8a Binary files /dev/null and b/.cache/clangd/index/threadwise_welford.hpp.CCCD2666E9AE4B4E.idx differ diff --git a/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx b/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx new file mode 100755 index 0000000000..08c0cb41be Binary files /dev/null and b/.cache/clangd/index/transform_contraction_to_gemm.hpp.51AB58D6126FEA84.idx differ diff --git a/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx b/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx new file mode 100755 index 0000000000..d41b4c9578 Binary files /dev/null and b/.cache/clangd/index/transform_conv_bwd_data_to_gemm_v1.hpp.341C45E4BB62B32F.idx differ diff --git a/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx b/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx new file mode 100755 index 0000000000..73d34c5cdd Binary files /dev/null and b/.cache/clangd/index/transform_conv_fwd_to_gemm.hpp.14B4756DD59EEE91.idx differ diff --git a/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx b/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx new file mode 100755 index 0000000000..0178c3a083 Binary files /dev/null and b/.cache/clangd/index/transpose_3d.hpp.F3212DEAAECABFBD.idx differ diff --git a/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx b/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx new file mode 100755 index 0000000000..41e7f15039 Binary files /dev/null and b/.cache/clangd/index/transpose_vectors.hpp.7945D9ABB9DC55BD.idx differ diff --git a/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx b/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx new file mode 100755 index 0000000000..cfed3c0a38 Binary files /dev/null and b/.cache/clangd/index/tuple.hpp.3819E4E0F87690B8.idx differ diff --git a/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx b/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx new file mode 100755 index 0000000000..cecd253d15 Binary files /dev/null and b/.cache/clangd/index/tuple_helper.hpp.73EC15D672B6492A.idx differ diff --git a/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx b/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx new file mode 100755 index 0000000000..43c0f57d6e Binary files /dev/null and b/.cache/clangd/index/type.hpp.3093CAE9B6EE9081.idx differ diff --git a/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx b/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx new file mode 100755 index 0000000000..769821a378 Binary files /dev/null and b/.cache/clangd/index/type_convert.hpp.5842DA15997DD61D.idx differ diff --git a/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx b/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx new file mode 100755 index 0000000000..21ac067ba2 Binary files /dev/null and b/.cache/clangd/index/type_convert_const.cpp.46FAD4095BF1793B.idx differ diff --git a/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx b/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx new file mode 100755 index 0000000000..5e11cde6f1 Binary files /dev/null and b/.cache/clangd/index/unary_element_wise_operation.hpp.DD0C457D046F7875.idx differ diff --git a/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx b/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx new file mode 100755 index 0000000000..7c9d29fa4e Binary files /dev/null and b/.cache/clangd/index/welford_helper.hpp.DC75228A25D5F78E.idx differ diff --git a/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx b/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx new file mode 100755 index 0000000000..dc97c586c7 Binary files /dev/null and b/.cache/clangd/index/wmma_gemm.hpp.260BC1EC1DC7109A.idx differ diff --git a/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx b/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx new file mode 100755 index 0000000000..f5902d5599 Binary files /dev/null and b/.cache/clangd/index/workgroup_barrier.hpp.E88FB4AF7CFEB4E5.idx differ diff --git a/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx b/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx new file mode 100755 index 0000000000..39f30da4f4 Binary files /dev/null and b/.cache/clangd/index/workgroup_synchronization.hpp.EE3FBA63C405EAA9.idx differ diff --git a/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx b/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx new file mode 100755 index 0000000000..3a523c7a8a Binary files /dev/null and b/.cache/clangd/index/xdlops_gemm.hpp.C8FD7C44D6227313.idx differ diff --git a/.clang-format b/.clang-format old mode 100644 new mode 100755 diff --git a/.clang-tidy b/.clang-tidy old mode 100644 new mode 100755 diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/config.yml b/.github/ISSUE_TEMPLATE/config.yml old mode 100644 new mode 100755 diff --git a/.github/ISSUE_TEMPLATE/issue_report.yml b/.github/ISSUE_TEMPLATE/issue_report.yml old mode 100644 new mode 100755 diff --git a/.github/dependabot.yml b/.github/dependabot.yml old mode 100644 new mode 100755 diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml old mode 100644 new mode 100755 diff --git a/.readthedocs.yaml b/.readthedocs.yaml old mode 100644 new mode 100755 diff --git a/CHANGELOG.md b/CHANGELOG.md old mode 100644 new mode 100755 diff --git a/CITATION.cff b/CITATION.cff old mode 100644 new mode 100755 diff --git a/CMakeLists.txt b/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md old mode 100644 new mode 100755 diff --git a/Config.cmake.in b/Config.cmake.in old mode 100644 new mode 100755 diff --git a/Dockerfile b/Dockerfile old mode 100644 new mode 100755 diff --git a/Jenkinsfile b/Jenkinsfile old mode 100644 new mode 100755 diff --git a/LICENSE b/LICENSE old mode 100644 new mode 100755 diff --git a/README.md b/README.md old mode 100644 new mode 100755 diff --git a/client_example/01_gemm/CMakeLists.txt b/client_example/01_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/01_gemm/gemm.cpp b/client_example/01_gemm/gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt b/client_example/02_gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_add_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_add_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp b/client_example/02_gemm_add_add_fastgelu/gemm_fastgelu_generic.cpp old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/CMakeLists.txt b/client_example/03_gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp b/client_example/03_gemm_layernorm/gemm_add_add_layernorm_naive.cpp old mode 100644 new mode 100755 diff --git a/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp b/client_example/03_gemm_layernorm/gemm_add_relu_add_layernorm_welford.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/CMakeLists.txt b/client_example/04_contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_bilinear_fp32.cpp b/client_example/04_contraction/contraction_bilinear_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_bilinear_fp64.cpp b/client_example/04_contraction/contraction_bilinear_fp64.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp b/client_example/04_contraction/contraction_g1m2n3k1_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_scale_fp32.cpp b/client_example/04_contraction/contraction_scale_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/04_contraction/contraction_scale_fp64.cpp b/client_example/04_contraction/contraction_scale_fp64.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/CMakeLists.txt b/client_example/05_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_bwd_data.cpp b/client_example/05_layernorm/layernorm2d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp b/client_example/05_layernorm/layernorm2d_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm2d_fwd.cpp b/client_example/05_layernorm/layernorm2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/05_layernorm/layernorm4d_fwd.cpp b/client_example/05_layernorm/layernorm4d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/06_softmax/CMakeLists.txt b/client_example/06_softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/06_softmax/softmax4d.cpp b/client_example/06_softmax/softmax4d.cpp old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/CMakeLists.txt b/client_example/07_grouped_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv1d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp b/client_example/07_grouped_convnd_fwd/grouped_conv2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/CMakeLists.txt b/client_example/08_fused_attention/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/fused_attention.cpp b/client_example/08_fused_attention/fused_attention.cpp old mode 100644 new mode 100755 diff --git a/client_example/08_fused_attention/fused_attention_bias.cpp b/client_example/08_fused_attention/fused_attention_bias.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/CMakeLists.txt b/client_example/09_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_relu_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_bias_tanh_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perchannel_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp b/client_example/09_quantization/conv2d_fwd_perlayer_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/09_quantization/gemm_quantization.cpp b/client_example/09_quantization/gemm_quantization.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt b/client_example/10_grouped_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv2d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp b/client_example/10_grouped_convnd_bwd_data/grouped_conv3d_bwd_data_input_fp16_comp_bf8f8.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt b/client_example/11_grouped_conv_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/common.hpp b/client_example/11_grouped_conv_bwd_weight/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv1d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv2d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp16_comp_bf8_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp b/client_example/11_grouped_conv_bwd_weight/grouped_conv3d_bwd_weight_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/12_elementwise_normalization/CMakeLists.txt b/client_example/12_elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp b/client_example/12_elementwise_normalization/elementwise_layernorm2d.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/CMakeLists.txt b/client_example/13_batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_bwd_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp b/client_example/13_batchnorm/batchnorm_fwd_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp b/client_example/13_batchnorm/batchnorm_infer_nhwc.cpp old mode 100644 new mode 100755 diff --git a/client_example/14_instance_id/CMakeLists.txt b/client_example/14_instance_id/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp b/client_example/14_instance_id/batchnorm_fwd_instance_id.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/CMakeLists.txt b/client_example/15_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/common.hpp b/client_example/15_convnd_bwd_data/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp b/client_example/15_convnd_bwd_data/conv3d_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_gemm_add_multiply/CMakeLists.txt b/client_example/15_gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp b/client_example/15_gemm_add_multiply/gemm_add_multiply.cpp old mode 100644 new mode 100755 diff --git a/client_example/15_reduce/CMakeLists.txt b/client_example/15_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/15_reduce/reduce_nhwc_c.cpp b/client_example/15_reduce/reduce_nhwc_c.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/CMakeLists.txt b/client_example/16_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/common.hpp b/client_example/16_convnd_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp16_comp_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp b/client_example/16_convnd_fwd/conv3d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt b/client_example/17_grouped_gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp b/client_example/17_grouped_gemm_fastgelu/grouped_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/CMakeLists.txt b/client_example/18_groupnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_bwd_data.cpp b/client_example/18_groupnorm/groupnorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_bwd_gamma_beta.cpp b/client_example/18_groupnorm/groupnorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/client_example/18_groupnorm/groupnorm_swish_fwd.cpp b/client_example/18_groupnorm/groupnorm_swish_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/CMakeLists.txt b/client_example/19_pool/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/19_pool/avg_pool3d_bwd.cpp b/client_example/19_pool/avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/avg_pool3d_fwd.cpp b/client_example/19_pool/avg_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/max_pool2d_bwd.cpp b/client_example/19_pool/max_pool2d_bwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/19_pool/max_pool2d_fwd.cpp b/client_example/19_pool/max_pool2d_fwd.cpp old mode 100644 new mode 100755 diff --git a/client_example/20_splitk_gemm/CMakeLists.txt b/client_example/20_splitk_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp b/client_example/20_splitk_gemm/splitK_gemm_fp16_f8.cpp old mode 100644 new mode 100755 diff --git a/client_example/21_grouped_gemm_bias/CMakeLists.txt b/client_example/21_grouped_gemm_bias/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp b/client_example/21_grouped_gemm_bias/grouped_gemm_fixed_nk_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/CMakeLists.txt b/client_example/22_grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_bf16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_fp8.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp b/client_example/22_grouped_gemm/grouped_gemm_fixed_nk_i8.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/CMakeLists.txt b/client_example/22_im2col_col2im/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/column_to_image.cpp b/client_example/22_im2col_col2im/column_to_image.cpp old mode 100644 new mode 100755 diff --git a/client_example/22_im2col_col2im/image_to_column.cpp b/client_example/22_im2col_col2im/image_to_column.cpp old mode 100644 new mode 100755 diff --git a/client_example/23_elementwise_transpose/CMakeLists.txt b/client_example/23_elementwise_transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/23_elementwise_transpose/elementwise_transpose_3d.cpp b/client_example/23_elementwise_transpose/elementwise_transpose_3d.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/CMakeLists.txt b/client_example/24_grouped_conv_activation/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_bilinear/grouped_conv_bwd_data_bilinear_residual_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_bilinear/grouped_conv_bwd_data_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_scale/grouped_conv_bwd_data_scale_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_bwd_data_scale/grouped_conv_bwd_data_scale_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_bilinear/grouped_conv_fwd_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scale/grouped_conv_fwd_scale_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scale/grouped_conv_fwd_scale_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab.inc old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_ab/grouped_conv_fwd_scaleadd_ab_int8.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu.inc old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_bf16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_fp32.cpp old mode 100644 new mode 100755 diff --git a/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp b/client_example/24_grouped_conv_activation/grouped_convnd_fwd_scaleadd_scaleadd_relu/grouped_conv_fwd_scaleadd_scaleadd_relu_int8.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/CMakeLists.txt b/client_example/25_wrapper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/README.md b/client_example/25_wrapper/README.md old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/tensor_transform_using_wrapper.cpp b/client_example/25_wrapper/tensor_transform_using_wrapper.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_basic_gemm.cpp b/client_example/25_wrapper/wrapper_basic_gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_img2col.cpp b/client_example/25_wrapper/wrapper_img2col.cpp old mode 100644 new mode 100755 diff --git a/client_example/25_wrapper/wrapper_optimized_gemm.cpp b/client_example/25_wrapper/wrapper_optimized_gemm.cpp old mode 100644 new mode 100755 diff --git a/client_example/CMakeLists.txt b/client_example/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/client_example/README.md b/client_example/README.md old mode 100644 new mode 100755 diff --git a/cmake/Analyzers.cmake b/cmake/Analyzers.cmake old mode 100644 new mode 100755 diff --git a/cmake/ClangTidy.cmake b/cmake/ClangTidy.cmake old mode 100644 new mode 100755 diff --git a/cmake/CppCheck.cmake b/cmake/CppCheck.cmake old mode 100644 new mode 100755 diff --git a/cmake/DoxygenDoc.cmake b/cmake/DoxygenDoc.cmake old mode 100644 new mode 100755 diff --git a/cmake/Embed.cmake b/cmake/Embed.cmake old mode 100644 new mode 100755 diff --git a/cmake/EnableCompilerWarnings.cmake b/cmake/EnableCompilerWarnings.cmake old mode 100644 new mode 100755 diff --git a/cmake/TargetFlags.cmake b/cmake/TargetFlags.cmake old mode 100644 new mode 100755 diff --git a/cmake/getopt.cmake b/cmake/getopt.cmake old mode 100644 new mode 100755 diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake old mode 100644 new mode 100755 diff --git a/codegen/CMakeLists.txt b/codegen/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/driver/main.cpp b/codegen/driver/main.cpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d.hpp b/codegen/include/ck/host/device_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d/operation.hpp b/codegen/include/ck/host/device_gemm_multiple_d/operation.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/device_gemm_multiple_d/problem.hpp b/codegen/include/ck/host/device_gemm_multiple_d/problem.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/headers.hpp b/codegen/include/ck/host/headers.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/operation/gemm.hpp b/codegen/include/ck/host/operation/gemm.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/stringutils.hpp b/codegen/include/ck/host/stringutils.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/types.hpp b/codegen/include/ck/host/types.hpp old mode 100644 new mode 100755 diff --git a/codegen/include/ck/host/utils.hpp b/codegen/include/ck/host/utils.hpp old mode 100644 new mode 100755 diff --git a/codegen/src/device_gemm_multiple_d.cpp b/codegen/src/device_gemm_multiple_d.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp b/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/headers.cpp b/codegen/src/headers.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/types.cpp b/codegen/src/types.cpp old mode 100644 new mode 100755 diff --git a/codegen/src/utils.cpp b/codegen/src/utils.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/CMakeLists.txt b/codegen/test/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/test/gemm_multiple_d.cpp b/codegen/test/gemm_multiple_d.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/include/test.hpp b/codegen/test/include/test.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/CMakeLists.txt b/codegen/test/rtc/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/compile_kernel.hpp b/codegen/test/rtc/include/rtc/compile_kernel.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/hip.hpp b/codegen/test/rtc/include/rtc/hip.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/kernel.hpp b/codegen/test/rtc/include/rtc/kernel.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/manage_ptr.hpp b/codegen/test/rtc/include/rtc/manage_ptr.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/include/rtc/tmp_dir.hpp b/codegen/test/rtc/include/rtc/tmp_dir.hpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/compile_kernel.cpp b/codegen/test/rtc/src/compile_kernel.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/hip.cpp b/codegen/test/rtc/src/hip.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/kernel.cpp b/codegen/test/rtc/src/kernel.cpp old mode 100644 new mode 100755 diff --git a/codegen/test/rtc/src/tmp_dir.cpp b/codegen/test/rtc/src/tmp_dir.cpp old mode 100644 new mode 100755 diff --git a/dev-requirements.txt b/dev-requirements.txt old mode 100644 new mode 100755 diff --git a/docs/Contributors_Guide.rst b/docs/Contributors_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/conceptual/what-is-ck.rst b/docs/conceptual/what-is-ck.rst old mode 100644 new mode 100755 diff --git a/docs/conf.py b/docs/conf.py old mode 100644 new mode 100755 diff --git a/docs/data/ck_component.png b/docs/data/ck_component.png old mode 100644 new mode 100755 diff --git a/docs/data/ck_layer.png b/docs/data/ck_layer.png old mode 100644 new mode 100755 diff --git a/docs/doxygen/Doxyfile b/docs/doxygen/Doxyfile old mode 100644 new mode 100755 diff --git a/docs/index.rst b/docs/index.rst old mode 100644 new mode 100755 diff --git a/docs/install/dockerhub.rst b/docs/install/dockerhub.rst old mode 100644 new mode 100755 diff --git a/docs/license.rst b/docs/license.rst old mode 100644 new mode 100755 diff --git a/docs/reference/API_Reference_Guide.rst b/docs/reference/API_Reference_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/reference/Supported_Primitives_Guide.rst b/docs/reference/Supported_Primitives_Guide.rst old mode 100644 new mode 100755 diff --git a/docs/reference/wrapper.rst b/docs/reference/wrapper.rst old mode 100644 new mode 100755 diff --git a/docs/refs.bib b/docs/refs.bib old mode 100644 new mode 100755 diff --git a/docs/sphinx/_toc.yml.in b/docs/sphinx/_toc.yml.in old mode 100644 new mode 100755 diff --git a/docs/sphinx/requirements.in b/docs/sphinx/requirements.in old mode 100644 new mode 100755 diff --git a/docs/sphinx/requirements.txt b/docs/sphinx/requirements.txt old mode 100644 new mode 100755 diff --git a/docs/tutorial/tutorial_hello_world.rst b/docs/tutorial/tutorial_hello_world.rst old mode 100644 new mode 100755 diff --git a/example/01_gemm/CMakeLists.txt b/example/01_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/01_gemm/README.md b/example/01_gemm/README.md old mode 100644 new mode 100755 diff --git a/example/01_gemm/common.hpp b/example/01_gemm/common.hpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_fp16.cpp b/example/01_gemm/gemm_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_fp32.cpp b/example/01_gemm/gemm_dl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_int4.cpp b/example/01_gemm/gemm_dl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dl_int8.cpp b/example/01_gemm/gemm_dl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_dpp_fp16.cpp b/example/01_gemm/gemm_dpp_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_wmma_fp16.cpp b/example/01_gemm/gemm_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_bf16.cpp b/example/01_gemm/gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_bf16_rtn.cpp b/example/01_gemm/gemm_xdl_bf16_rtn.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16.cpp b/example/01_gemm/gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16_fp8.cpp b/example/01_gemm/gemm_xdl_fp16_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp16_v2.cpp b/example/01_gemm/gemm_xdl_fp16_v2.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp64.cpp b/example/01_gemm/gemm_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp8.cpp b/example/01_gemm/gemm_xdl_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_fp8_bf8.cpp b/example/01_gemm/gemm_xdl_fp8_bf8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_int4.cpp b/example/01_gemm/gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_int8.cpp b/example/01_gemm/gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp b/example/01_gemm/gemm_xdl_lds_direct_load_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp b/example/01_gemm/gemm_xdl_lds_direct_load_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp b/example/01_gemm/gemm_xdl_skip_b_lds_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_streamk.cpp b/example/01_gemm/gemm_xdl_streamk.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/gemm_xdl_wavelet_fp16.cpp b/example/01_gemm/gemm_xdl_wavelet_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/01_gemm/run_gemm_example.inc b/example/01_gemm/run_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/CMakeLists.txt b/example/02_gemm_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/README.md b/example/02_gemm_bilinear/README.md old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp b/example/02_gemm_bilinear/gemm_bilinear_wmma_int8.cpp old mode 100644 new mode 100755 diff --git a/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp b/example/02_gemm_bilinear/gemm_bilinear_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/CMakeLists.txt b/example/03_gemm_bias_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/README.md b/example/03_gemm_bias_relu/README.md old mode 100644 new mode 100755 diff --git a/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp b/example/03_gemm_bias_relu/gemm_bias_relu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/CMakeLists.txt b/example/04_gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/README.md b/example/04_gemm_add_add_fastgelu/README.md old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/common.hpp b/example/04_gemm_add_add_fastgelu/common.hpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp b/example/04_gemm_add_add_fastgelu/gemm_add_add_fastgelu_xdl_lds_direct_load_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc b/example/04_gemm_add_add_fastgelu/run_gemm_add_add_fastgelu_example.inc old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/CMakeLists.txt b/example/09_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/README.md b/example/09_convnd_fwd/README.md old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_common.hpp b/example/09_convnd_fwd/convnd_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_common.hpp b/example/09_convnd_fwd/convnd_fwd_dl_common.hpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_dl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_dl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp b/example/09_convnd_fwd/convnd_fwd_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc b/example/09_convnd_fwd/run_convnd_fwd_dl_example.inc old mode 100644 new mode 100755 diff --git a/example/09_convnd_fwd/run_convnd_fwd_example.inc b/example/09_convnd_fwd/run_convnd_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt b/example/10_convnd_fwd_multiple_d_multiple_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/common.hpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp b/example/10_convnd_fwd_multiple_d_multiple_reduce/convnd_fwd_max_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc b/example/10_convnd_fwd_multiple_d_multiple_reduce/run_convnd_fwd_max_example.inc old mode 100644 new mode 100755 diff --git a/example/12_reduce/CMakeLists.txt b/example/12_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/12_reduce/README.md b/example/12_reduce/README.md old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise.cpp b/example/12_reduce/reduce_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise_impl.hpp b/example/12_reduce/reduce_blockwise_impl.hpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_blockwise_two_call.cpp b/example/12_reduce/reduce_blockwise_two_call.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_example_common.hpp b/example/12_reduce/reduce_example_common.hpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_multiblock_atomic_add.cpp b/example/12_reduce/reduce_multiblock_atomic_add.cpp old mode 100644 new mode 100755 diff --git a/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp b/example/12_reduce/reduce_multiblock_atomic_add_impl.hpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/CMakeLists.txt b/example/13_pool2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/README.md b/example/13_pool2d_fwd/README.md old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_common.hpp b/example/13_pool2d_fwd/pool2d_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp b/example/13_pool2d_fwd/pool2d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/CMakeLists.txt b/example/14_gemm_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_dl_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_bias_relu_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp b/example/14_gemm_quantization/gemm_xdl_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/CMakeLists.txt b/example/15_grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/README.md b/example/15_grouped_gemm/README.md old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_multiple_d_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fixed_nk_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp b/example/15_grouped_gemm/grouped_gemm_xdl_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/15_grouped_gemm/run_grouped_gemm_example.inc b/example/15_grouped_gemm/run_grouped_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt b/example/16_gemm_multi_d_multi_reduces/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_add_mean_meansquare_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_add_addsquare_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_max_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp b/example/16_gemm_multi_d_multi_reduces/gemm_mean_meansquare_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp b/example/16_gemm_multi_d_multi_reduces/gemm_reduce_xdl_common.hpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/CMakeLists.txt b/example/17_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/README.md b/example/17_convnd_bwd_data/README.md old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp b/example/17_convnd_bwd_data/convnd_bwd_data_common.hpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp b/example/17_convnd_bwd_data/convnd_bwd_data_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/18_batched_gemm_reduce/CMakeLists.txt b/example/18_batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp b/example/18_batched_gemm_reduce/batched_gemm_reduce_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/CMakeLists.txt b/example/19_binary_elementwise/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp b/example/19_binary_elementwise/broadcast_add_2d_amn_bn.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp b/example/19_binary_elementwise/broadcast_add_3d_am_bmnk.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/elementwise_add_1d.cpp b/example/19_binary_elementwise/elementwise_add_1d.cpp old mode 100644 new mode 100755 diff --git a/example/19_binary_elementwise/elementwise_add_4d.cpp b/example/19_binary_elementwise/elementwise_add_4d.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/CMakeLists.txt b/example/20_grouped_conv_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/common.hpp b/example/20_grouped_conv_bwd_weight/common.hpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp b/example/20_grouped_conv_bwd_weight/grouped_conv_bwd_weight_xdl_fp16_comp_bf8_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc b/example/20_grouped_conv_bwd_weight/run_grouped_conv_bwd_weight_example.inc old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/CMakeLists.txt b/example/21_gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_naive_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp b/example/21_gemm_layernorm/gemm_bias_relu_add_layernorm_xdl_welford_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp b/example/21_gemm_layernorm/gemm_layernorm_xdl_naive_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp b/example/21_gemm_layernorm/gemm_xdl_layernorm_naive_single_kernel_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/CMakeLists.txt b/example/22_cgemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_bf16.cpp b/example/22_cgemm/cgemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_common.hpp b/example/22_cgemm/cgemm_xdl_common.hpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_fp16.cpp b/example/22_cgemm/cgemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_fp32.cpp b/example/22_cgemm/cgemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_int4.cpp b/example/22_cgemm/cgemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/22_cgemm/cgemm_xdl_int8.cpp b/example/22_cgemm/cgemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/23_softmax/CMakeLists.txt b/example/23_softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/23_softmax/README.md b/example/23_softmax/README.md old mode 100644 new mode 100755 diff --git a/example/23_softmax/softmax_blockwise.cpp b/example/23_softmax/softmax_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/CMakeLists.txt b/example/24_batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp b/example/24_batched_gemm/batched_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp b/example/24_batched_gemm/batched_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_int4.cpp b/example/24_batched_gemm/batched_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/batched_gemm_xdl_int8.cpp b/example/24_batched_gemm/batched_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/24_batched_gemm/run_batched_gemm_example.inc b/example/24_batched_gemm/run_batched_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/CMakeLists.txt b/example/25_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m2n3k1_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp b/example/25_gemm_bias_e_permute/gemm_bias_e_permute_g1m3n2k1_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/CMakeLists.txt b/example/26_contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/26_contraction/README.md b/example/26_contraction/README.md old mode 100644 new mode 100755 diff --git a/example/26_contraction/common_instances.hpp b/example/26_contraction/common_instances.hpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_bf16_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_bf16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp16_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32_compute_bf16.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32_compute_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp32_compute_fp16.cpp b/example/26_contraction/contraction_bilinear_xdl_fp32_compute_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp64.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_bilinear_xdl_fp64_compute_fp32.cpp b/example/26_contraction/contraction_bilinear_xdl_fp64_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_bf16_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_bf16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp16_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp16_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32_compute_bf16.cpp b/example/26_contraction/contraction_scale_xdl_fp32_compute_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp32_compute_fp16.cpp b/example/26_contraction/contraction_scale_xdl_fp32_compute_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp64.cpp b/example/26_contraction/contraction_scale_xdl_fp64.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/contraction_scale_xdl_fp64_compute_fp32.cpp b/example/26_contraction/contraction_scale_xdl_fp64_compute_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/26_contraction/run_contraction_bilinear_example.inc b/example/26_contraction/run_contraction_bilinear_example.inc old mode 100644 new mode 100755 diff --git a/example/26_contraction/run_contraction_scale_example.inc b/example/26_contraction/run_contraction_scale_example.inc old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/CMakeLists.txt b/example/27_layernorm2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/common.hpp b/example/27_layernorm2d_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp b/example/27_layernorm2d_fwd/layernorm2d_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/27_layernorm2d_fwd/run_layernorm_example.inc b/example/27_layernorm2d_fwd/run_layernorm_example.inc old mode 100644 new mode 100755 diff --git a/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt b/example/28_grouped_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp b/example/28_grouped_gemm_bias_e_permute/grouped_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/CMakeLists.txt b/example/29_batched_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp b/example/29_batched_gemm_bias_e_permute/batched_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt b/example/30_grouped_conv_fwd_multiple_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/README.md b/example/30_grouped_conv_fwd_multiple_d/README.md old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/common.hpp b/example/30_grouped_conv_fwd_multiple_d/common.hpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp b/example/30_grouped_conv_fwd_multiple_d/common_wmma.hpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_wmma_int8.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_bias_relu_add_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp b/example/30_grouped_conv_fwd_multiple_d/grouped_conv_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_example.inc old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_bias_relu_add_wmma_example.inc old mode 100644 new mode 100755 diff --git a/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc b/example/30_grouped_conv_fwd_multiple_d/run_grouped_conv_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/CMakeLists.txt b/example/31_batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp b/example/31_batched_gemm_gemm/batched_gemm_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc b/example/31_batched_gemm_gemm/run_batched_gemm_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt b/example/32_batched_gemm_scale_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/batched_gemm_scale_softmax_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/cross_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_lower_triangle_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_gemm_scale_softmax_gemm_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/grouped_query_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/multi_query_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_batched_gemm_scale_softmax_gemm_permute_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_cross_attention_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_cross_attention_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_gemm_scale_softmax_gemm_permute.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_grouped_query_attention_forward_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_grouped_query_attention_forward_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_multi_query_attention_forward_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_multi_query_attention_forward_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/run_self_attention_wmma.inc b/example/32_batched_gemm_scale_softmax_gemm/run_self_attention_wmma.inc old mode 100644 new mode 100755 diff --git a/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp b/example/32_batched_gemm_scale_softmax_gemm/self_attention_forward_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/CMakeLists.txt b/example/33_multiple_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/README.md b/example/33_multiple_reduce/README.md old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_common.hpp b/example/33_multiple_reduce/dual_reduce_common.hpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_multiblock.cpp b/example/33_multiple_reduce/dual_reduce_multiblock.cpp old mode 100644 new mode 100755 diff --git a/example/33_multiple_reduce/dual_reduce_threadwise.cpp b/example/33_multiple_reduce/dual_reduce_threadwise.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/CMakeLists.txt b/example/34_batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/README.md b/example/34_batchnorm/README.md old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_backward_nhwc.cpp b/example/34_batchnorm/batchnorm_backward_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_common.hpp b/example/34_batchnorm/batchnorm_common.hpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_inferring_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp b/example/34_batchnorm/batchnorm_forward_training_nhwc_obsolete.cpp old mode 100644 new mode 100755 diff --git a/example/34_batchnorm/batchnorm_infer_impl.hpp b/example/34_batchnorm/batchnorm_infer_impl.hpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/CMakeLists.txt b/example/35_splitK_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/run_splitK_gemm_example.inc b/example/35_splitK_gemm/run_splitK_gemm_example.inc old mode 100644 new mode 100755 index e3690984ab..944377a4c4 --- a/example/35_splitK_gemm/run_splitK_gemm_example.inc +++ b/example/35_splitK_gemm/run_splitK_gemm_example.inc @@ -60,10 +60,14 @@ bool run_splitK_gemm(const ProblemSize& problem_size, const ExecutionConfig& con case 1: a_m_k.GenerateTensorValue(GeneratorTensor_2{-5, 5}); b_k_n.GenerateTensorValue(GeneratorTensor_2{-5, 5}); + //a_m_k.GenerateTensorValue_control_entropy(GeneratorTensor_2); + //b_k_n.GenerateTensorValue_control_entropy(GeneratorTensor_2{-5, 5}); break; case 2: a_m_k.GenerateTensorValue(GeneratorTensor_3{0.0, 1.0}); b_k_n.GenerateTensorValue(GeneratorTensor_3{-0.5, 0.5}); + //a_m_k.GenerateTensorValue(GeneratorTensor_3_control_entropy{}); + //b_k_n.GenerateTensorValue(GeneratorTensor_3_control_entropy{}); break; default: a_m_k.GenerateTensorValue(GeneratorTensor_Sequential<0>{}); diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp16_fp8.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp b/example/35_splitK_gemm/splitK_gemm_xdl_lds_direct_load_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/36_sparse_embedding/CMakeLists.txt b/example/36_sparse_embedding/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp b/example/36_sparse_embedding/sparse_embedding3_forward_layernorm.cpp old mode 100644 new mode 100755 diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt b/example/37_batched_gemm_add_add_relu_gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp b/example/37_batched_gemm_add_add_relu_gemm_add/batched_gemm_add_add_relu_gemm_add_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt b/example/38_grouped_conv_bwd_data_multiple_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/common.hpp b/example/38_grouped_conv_bwd_data_multiple_d/common.hpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_bias_relu_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_wmma_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp b/example/38_grouped_conv_bwd_data_multiple_d/grouped_conv_bwd_data_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_bias_relu_example.inc old mode 100644 new mode 100755 diff --git a/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc b/example/38_grouped_conv_bwd_data_multiple_d/run_grouped_conv_bwd_data_example.inc old mode 100644 new mode 100755 diff --git a/example/39_permute/CMakeLists.txt b/example/39_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/39_permute/common.hpp b/example/39_permute/common.hpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_1xHxW_fp16.cpp b/example/39_permute/permute_1xHxW_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_HxWx4_fp16.cpp b/example/39_permute/permute_HxWx4_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/permute_NxHxW_fp16.cpp b/example/39_permute/permute_NxHxW_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/39_permute/run_permute_bundle_example.inc b/example/39_permute/run_permute_bundle_example.inc old mode 100644 new mode 100755 diff --git a/example/39_permute/run_permute_element_example.inc b/example/39_permute/run_permute_element_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/CMakeLists.txt b/example/40_conv2d_fwd_quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/common.hpp b/example/40_conv2d_fwd_quantization/common.hpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_relu_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_bias_tanh_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_dl_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_bias_relu_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perchannel_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp b/example/40_conv2d_fwd_quantization/conv2d_fwd_xdl_perlayer_quantization_int8.cpp old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perchannel_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_bias_perlayer_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perchannel_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc b/example/40_conv2d_fwd_quantization/run_conv2d_fwd_perlayer_quantization_example.inc old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/CMakeLists.txt b/example/41_grouped_conv_conv_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int4.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp b/example/41_grouped_conv_conv_fwd/grouped_conv_conv_fwd_xdl_int8.cpp old mode 100644 new mode 100755 diff --git a/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc b/example/41_grouped_conv_conv_fwd/run_grouped_conv_conv_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/CMakeLists.txt b/example/42_groupnorm_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/common.hpp b/example/42_groupnorm_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_sigmoid_mul_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp b/example/42_groupnorm_fwd/groupnorm_fwd_swish_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc b/example/42_groupnorm_fwd/run_groupnorm_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt b/example/43_splitk_gemm_bias_e_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp b/example/43_splitk_gemm_bias_e_permute/splitk_gemm_bias_e_permute_xdl_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/CMakeLists.txt b/example/44_elementwise_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute.cpp b/example/44_elementwise_permute/elementwise_permute.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_3d.cpp b/example/44_elementwise_permute/elementwise_permute_3d.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_2d.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_col.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp16_row.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_col.cpp old mode 100644 new mode 100755 diff --git a/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp b/example/44_elementwise_permute/elementwise_permute_4D_fp32_row.cpp old mode 100644 new mode 100755 diff --git a/example/45_elementwise_normalization/CMakeLists.txt b/example/45_elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp b/example/45_elementwise_normalization/elementwise_layernorm_blockwise.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/CMakeLists.txt b/example/46_gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/README.md b/example/46_gemm_add_multiply/README.md old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/common.hpp b/example/46_gemm_add_multiply/common.hpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_dl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp b/example/46_gemm_add_multiply/gemm_add_multiply_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc b/example/46_gemm_add_multiply/run_gemm_add_multiply_example.inc old mode 100644 new mode 100755 diff --git a/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt b/example/47_gemm_bias_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp b/example/47_gemm_bias_softmax_gemm_permute/gemm_bias_softmax_gemm_permute.cpp old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/CMakeLists.txt b/example/48_pool3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/pool3d_fwd_common.hpp b/example/48_pool3d_fwd/pool3d_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp b/example/48_pool3d_fwd/pool3d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/CMakeLists.txt b/example/49_maxpool2d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp b/example/49_maxpool2d_bwd/maxpool2d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/50_put_element/CMakeLists.txt b/example/50_put_element/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/50_put_element/put_element_fp16.cpp b/example/50_put_element/put_element_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/CMakeLists.txt b/example/51_avgpool3d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_common.hpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp b/example/51_avgpool3d_bwd/avgpool3d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/CMakeLists.txt b/example/52_im2col_col2im/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/column_to_image_f32.cpp b/example/52_im2col_col2im/column_to_image_f32.cpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/common.hpp b/example/52_im2col_col2im/common.hpp old mode 100644 new mode 100755 diff --git a/example/52_im2col_col2im/image_to_column_f32.cpp b/example/52_im2col_col2im/image_to_column_f32.cpp old mode 100644 new mode 100755 diff --git a/example/53_layernorm2d_bwd/CMakeLists.txt b/example/53_layernorm2d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp b/example/53_layernorm2d_bwd/layernorm2d_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/54_groupnorm_bwd/CMakeLists.txt b/example/54_groupnorm_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp b/example/54_groupnorm_bwd/groupnorm_bwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/60_gemm_multi_ABD/CMakeLists.txt b/example/60_gemm_multi_ABD/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp b/example/60_gemm_multi_ABD/gemm_multi_ABD_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/61_contraction_multi_ABD/CMakeLists.txt b/example/61_contraction_multi_ABD/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp b/example/61_contraction_multi_ABD/contraction_multi_ABD_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/CMakeLists.txt b/example/62_convnd_activ/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/CMakeLists.txt b/example/62_convnd_activ/binary/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_bwd_data_xdl_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp b/example/62_convnd_activ/binary/convnd_fwd_xdl_bilinear_residual_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_bcasted_bias_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp b/example/62_convnd_activ/convnd_fwd_xdl_scaleadd_scaleadd_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/CMakeLists.txt b/example/62_convnd_activ/multi_AB/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_bf16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_fp32.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp b/example/62_convnd_activ/multi_AB/conv_fwd_xdl_scaleadd_ab_int8.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp b/example/62_convnd_activ/multi_AB/convnd_fwd_activ_multi_ab_common.hpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/run_convnd_activ_example.inc b/example/62_convnd_activ/run_convnd_activ_example.inc old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/CMakeLists.txt b/example/62_convnd_activ/unary/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp b/example/62_convnd_activ/unary/convnd_fwd_activ_unary_common.hpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_abs_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_clippedrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_elu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_leakyrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_pow_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_relu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_sigmoid_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_softrelu_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp b/example/62_convnd_activ/unary/convnd_fwd_xdl_tanh_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/CMakeLists.txt b/example/63_layernorm4d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/common.hpp b/example/63_layernorm4d_fwd/common.hpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp b/example/63_layernorm4d_fwd/layernorm4d_fwd_splitk_fp16.cpp old mode 100644 new mode 100755 diff --git a/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc b/example/63_layernorm4d_fwd/run_layernorm4d_fwd_example.inc old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/CMakeLists.txt b/example/64_fpAintB_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/common.hpp b/example/64_fpAintB_gemm/common.hpp old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/fp16int8_gemm_wmma.cpp b/example/64_fpAintB_gemm/fp16int8_gemm_wmma.cpp old mode 100644 new mode 100755 diff --git a/example/64_fpAintB_gemm/run_gemm_example.inc b/example/64_fpAintB_gemm/run_gemm_example.inc old mode 100644 new mode 100755 diff --git a/example/CMakeLists.txt b/example/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp old mode 100644 new mode 100755 diff --git a/include/ck/config.h.in b/include/ck/config.h.in old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/device_prop.hpp b/include/ck/host_utility/device_prop.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/hip_check_error.hpp b/include/ck/host_utility/hip_check_error.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/io.hpp b/include/ck/host_utility/io.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/kernel_launch.hpp b/include/ck/host_utility/kernel_launch.hpp old mode 100644 new mode 100755 diff --git a/include/ck/host_utility/stream_utility.hpp b/include/ck/host_utility/stream_utility.hpp old mode 100644 new mode 100755 diff --git a/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp b/include/ck/problem_transform/transform_forward_convolution3d_into_gemm_v4r4r4_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/stream_config.hpp b/include/ck/stream_config.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor/static_tensor.hpp b/include/ck/tensor/static_tensor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/cluster_descriptor.hpp b/include/ck/tensor_description/cluster_descriptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/multi_index_transform.hpp b/include/ck/tensor_description/multi_index_transform.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/multi_index_transform_helper.hpp b/include/ck/tensor_description/multi_index_transform_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_adaptor.hpp b/include/ck/tensor_description/tensor_adaptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_descriptor.hpp b/include/ck/tensor_description/tensor_descriptor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_descriptor_helper.hpp b/include/ck/tensor_description/tensor_descriptor_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_description/tensor_space_filling_curve.hpp b/include/ck/tensor_description/tensor_space_filling_curve.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dl_v2r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v2r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dlops_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_pipeline_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/block/blockwise_gemm_xdlops_skip_b_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp b/include/ck/tensor_operation/gpu/block/blockwise_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/block/blockwise_tensor_slice_transfer_v5r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp b/include/ck/tensor_operation/gpu/block/blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp b/include/ck/tensor_operation/gpu/block/reduction_functions_blockwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1_dequant.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v4r1_dequant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r1r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v6r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/block/thread_group_tensor_slice_transfer_v7r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp b/include/ck/tensor_operation/gpu/device/conv_tensor_rearrange_op.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_data_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_backward_weight_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp b/include/ck/tensor_operation/gpu/device/convolution_forward_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_avgpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_base.hpp b/include/ck/tensor_operation/gpu/device/device_base.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_e_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multi_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_multiple_d_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_batched_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp b/include/ck/tensor_operation/gpu/device/device_batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_cgemm.hpp b/include/ck/tensor_operation/gpu/device/device_cgemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_conv_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp b/include/ck/tensor_operation/gpu/device/device_conv_fwd_bias_activation_add.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/device/device_conv_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_normalization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_elementwise_scale.hpp b/include/ck/tensor_operation/gpu/device/device_elementwise_scale.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_bias_e_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_dequantB.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_dequantB.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_multiple_d_multiple_r.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp b/include/ck/tensor_operation/gpu/device/device_gemm_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_data_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_abd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_conv_fwd_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp b/include/ck/tensor_operation/gpu/device/device_grouped_gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp b/include/ck/tensor_operation/gpu/device/device_max_pool_bwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_multiple_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_bwd_data.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_bwd_gamma_beta.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_normalization_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_normalization_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_permute.hpp b/include/ck/tensor_operation/gpu/device/device_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp b/include/ck/tensor_operation/gpu/device/device_pool_fwd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_put_element.hpp b/include/ck/tensor_operation/gpu/device/device_put_element.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_reduce.hpp b/include/ck/tensor_operation/gpu/device/device_reduce.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_softmax.hpp b/include/ck/tensor_operation/gpu/device/device_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp b/include/ck/tensor_operation/gpu/device/device_splitk_contraction_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp b/include/ck/tensor_operation/gpu/device/gemm_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_avgpool3d_bwd_ndhwc_ndhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_e_permute_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multi_d_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_backward_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batchnorm_forward_impl_obsolete.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_cgemm_4gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_column_to_image_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_contraction_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_contraction_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_backward_weight_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_naive_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_conv3d_fwd_xdl_ndhwc_kzyxc_ndhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_convnd_bwd_data_nwc_kxc_nwk_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_2d_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_3d_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_normalization_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_elementwise_scale_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_fpAintB_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_fpAintB_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_bias_add_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_layernorm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_reduce_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_layernorm_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_skip_b_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_splitk_c_shuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_gemm_xdl_waveletmodel_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_data_multiple_d_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_bwd_weight_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_multiple_d_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_dl_nhwc_kyxc_nhwk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_fwd_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_conv_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_multiple_d_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_softmax_gemm_permute_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_gemm_xdl_splitk_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_grouped_query_attention_forward_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_grouped_query_attention_forward_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_image_to_column_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_max_pool_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multi_query_attention_forward_wmma.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multi_query_attention_forward_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_multiple_reduce_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_data_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_splitk_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_normalization_fwd_splitk_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool2d_fwd_nhwc_nhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp b/include/ck/tensor_operation/gpu/device/impl/device_pool3d_fwd_ndhwc_ndhwc.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_common.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp b/include/ck/tensor_operation/gpu/device/impl/device_reduce_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp b/include/ck/tensor_operation/gpu/device/impl/device_softmax_impl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/device/impl/device_sparse_embeddings_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_splitk_contraction_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/masking_specialization.hpp b/include/ck/tensor_operation/gpu/device/masking_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/matrix_padder.hpp b/include/ck/tensor_operation/gpu/device/matrix_padder.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp b/include/ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/tensor_layout.hpp b/include/ck/tensor_operation/gpu/device/tensor_layout.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp b/include/ck/tensor_operation/gpu/device/tensor_specialization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/device/welford_helper.hpp b/include/ck/tensor_operation/gpu/device/welford_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/quantization_operation.hpp b/include/ck/tensor_operation/gpu/element/quantization_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_reduce_second_half_batchnorm_backward_final.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_first_half.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_batchnorm_forward_final_obsolete.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp b/include/ck/tensor_operation/gpu/grid/batchnorm_multiblock/gridwise_multiblock_welford_second_half_multiblock_reduce_first_half.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp b/include/ck/tensor_operation/gpu/grid/block_to_ctile_map.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_gemm_multiple_d_welford_first_half_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp b/include/ck/tensor_operation/gpu/grid/gemm_layernorm/gridwise_welford_second_half_layernorm2d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_multiple_reduction_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_multiblock.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_2d_reduction_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_gemm_multiple_d_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_multiple_d_softmax_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batched_gemm_softmax_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_backward_blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_batchnorm_forward_blockwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_1d_scale.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_2d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_3d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_3d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_fpAintB_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_bias_add_reduce_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_multiple_d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dl_v1r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_abd_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_multiple_r_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_wmma_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_cshuffle_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_multiple_d_xdl_splitk_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_selector.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v4_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_pipeline_v4_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_reduce_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_split_k_multiple_d_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_waveletmodel.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_cshuffle_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_layernorm_cshuffle_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_waveletmodel_cshuffle.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_skip_b_lds_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_splitk_lds_direct_load.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_streamk.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp old mode 100644 new mode 100755 index b52f5c51b1..fb89b45f1e --- a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v2r4r2.hpp @@ -27,13 +27,13 @@ template __global__ void #if CK_USE_LAUNCH_BOUNDS - __launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) +__launch_bounds__(CK_MAX_THREAD_PER_BLOCK, CK_MIN_BLOCK_PER_CU) #endif - kernel_gemm_xdlops_v2r4r2_simplified(typename GridwiseGemm::Argument karg, - const Block2CTileMap& b2c_map, - const AElementwiseOperation a_element_op, - const BElementwiseOperation b_element_op, - const CElementwiseOperation c_element_op) + kernel_gemm_xdlops_v2r4r2_simplified(typename GridwiseGemm::Argument karg, + const Block2CTileMap& b2c_map, + const AElementwiseOperation a_element_op, + const BElementwiseOperation b_element_op, + const CElementwiseOperation c_element_op) { #if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx908__) || defined(__gfx90a__) || \ defined(__gfx94__)) @@ -669,433 +669,450 @@ struct GridwiseGemm_bk0mk1_bk0nk1_mn_xdlops_v2r4r2 const BElementwiseOperation b_element_op = BElementwiseOperation{}, const CElementwiseOperation c_element_op = CElementwiseOperation{}) { - const FloatA* p_a_grid = karg.p_a_grid; - const FloatB* p_b_grid = karg.p_b_grid; - FloatC* p_c_grid = karg.p_c_grid; - const auto a_b_k0_m_k1_grid_desc = MakeAGridDescriptor_KBatch_K0_M_K1( - karg.M, karg.MPadded, karg.K, karg.StrideA, karg.k_batch, karg.K0Padded, karg.KPadded); - const auto b_b_k0_n_k1_grid_desc = MakeBGridDescriptor_KBatch_K0_N_K1( - karg.K, karg.NPadded, karg.N, karg.StrideB, karg.k_batch, karg.K0Padded, karg.KPadded); - const auto c_grid_desc_m_n = MakeCGridDescriptor_M_N(karg.M, karg.N, karg.StrideC); - - const auto c_grid_desc_mblock_mperblock_nblock_nperblock = - MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(c_grid_desc_m_n); - - const auto a_grid_buf = make_dynamic_buffer( - p_a_grid, a_b_k0_m_k1_grid_desc.GetElementSpaceSize()); - const auto b_grid_buf = make_dynamic_buffer( - p_b_grid, b_b_k0_n_k1_grid_desc.GetElementSpaceSize()); - auto c_grid_buf = make_dynamic_buffer( - p_c_grid, c_grid_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); - - // divide block work by [KBatch, M, N] - const auto block_work_idx = - block_2_ctile_map.CalculateBottomIndex(make_multi_index(get_block_1d_id())); - - if(!block_2_ctile_map.ValidCTileIndex( - block_work_idx, - make_tuple(c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I0), - c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I2)))) + for(auto i = 0; i < 1500; i++) { - return; - } + const FloatA* p_a_grid = karg.p_a_grid; + const FloatB* p_b_grid = karg.p_b_grid; + FloatC* p_c_grid = karg.p_c_grid; + const auto a_b_k0_m_k1_grid_desc = MakeAGridDescriptor_KBatch_K0_M_K1(karg.M, + karg.MPadded, + karg.K, + karg.StrideA, + karg.k_batch, + karg.K0Padded, + karg.KPadded); + const auto b_b_k0_n_k1_grid_desc = MakeBGridDescriptor_KBatch_K0_N_K1(karg.K, + karg.NPadded, + karg.N, + karg.StrideB, + karg.k_batch, + karg.K0Padded, + karg.KPadded); + const auto c_grid_desc_m_n = MakeCGridDescriptor_M_N(karg.M, karg.N, karg.StrideC); - const index_t block_m_id = __builtin_amdgcn_readfirstlane(block_work_idx[I1]); - const index_t block_n_id = __builtin_amdgcn_readfirstlane(block_work_idx[I2]); - const index_t k_batch_id = __builtin_amdgcn_readfirstlane(block_work_idx[I0]); + const auto c_grid_desc_mblock_mperblock_nblock_nperblock = + MakeCGridDesc_MBlock_MPerBlock_NBlock_NPerBlock(c_grid_desc_m_n); - // HACK: this force m/n_block_data_idx_on_grid into SGPR - const index_t m_block_data_idx_on_grid = - __builtin_amdgcn_readfirstlane(block_m_id * MPerBlock); + const auto a_grid_buf = make_dynamic_buffer( + p_a_grid, a_b_k0_m_k1_grid_desc.GetElementSpaceSize()); + const auto b_grid_buf = make_dynamic_buffer( + p_b_grid, b_b_k0_n_k1_grid_desc.GetElementSpaceSize()); + auto c_grid_buf = make_dynamic_buffer( + p_c_grid, c_grid_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); - const index_t n_block_data_idx_on_grid = - __builtin_amdgcn_readfirstlane(block_n_id * NPerBlock); + // divide block work by [KBatch, M, N] + const auto block_work_idx = + block_2_ctile_map.CalculateBottomIndex(make_multi_index(get_block_1d_id())); - // lds max alignment - constexpr auto max_lds_align = K1; - - // A matrix in LDS memory, dst of blockwise copy - constexpr auto a_k0_m_k1_block_desc = [&]() { - if constexpr(ABlockLdsExtraM) + if(!block_2_ctile_map.ValidCTileIndex( + block_work_idx, + make_tuple(c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I0), + c_grid_desc_mblock_mperblock_nblock_nperblock.GetLength(I2)))) { - return make_naive_tensor_descriptor( - make_tuple(Number{}, Number{}, K1), - make_tuple(Number{} * K1, K1, I1)); + return; } - else + + const index_t block_m_id = __builtin_amdgcn_readfirstlane(block_work_idx[I1]); + const index_t block_n_id = __builtin_amdgcn_readfirstlane(block_work_idx[I2]); + const index_t k_batch_id = __builtin_amdgcn_readfirstlane(block_work_idx[I0]); + + // HACK: this force m/n_block_data_idx_on_grid into SGPR + const index_t m_block_data_idx_on_grid = + __builtin_amdgcn_readfirstlane(block_m_id * MPerBlock); + + const index_t n_block_data_idx_on_grid = + __builtin_amdgcn_readfirstlane(block_n_id * NPerBlock); + + // lds max alignment + constexpr auto max_lds_align = K1; + + // A matrix in LDS memory, dst of blockwise copy + constexpr auto a_k0_m_k1_block_desc = [&]() { + if constexpr(ABlockLdsExtraM) + { + return make_naive_tensor_descriptor( + make_tuple(Number{}, Number{}, K1), + make_tuple(Number{} * K1, K1, I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number{}, Number{}, K1), max_lds_align); + } + }(); + + constexpr auto a_b_k0_m_k1_block_desc = [&]() { + if constexpr(ABlockLdsExtraM) + { + return make_naive_tensor_descriptor( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + make_tuple(Number{} * Number{} * K1, + Number{} * K1, + K1, + I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + max_lds_align); + } + }(); + // B matrix in LDS memory, dst of blockwise copy + constexpr auto b_k0_n_k1_block_desc = [&]() { + if constexpr(BBlockLdsExtraN) + { + return make_naive_tensor_descriptor( + make_tuple(Number{}, Number{}, K1), + make_tuple(Number{} * K1, K1, I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number{}, Number{}, K1), max_lds_align); + } + }(); + + constexpr auto b_b_k0_n_k1_block_desc = [&]() { + if constexpr(BBlockLdsExtraN) + { + return make_naive_tensor_descriptor( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + make_tuple(Number{} * Number{} * K1, + Number{} * K1, + K1, + I1)); + } + else + { + return make_naive_tensor_descriptor_aligned( + make_tuple(Number<1>{}, Number{}, Number{}, K1), + max_lds_align); + } + }(); + // A matrix blockwise copy + auto a_blockwise_copy = + ThreadGroupTensorSliceTransfer_v4r1, + ABlockTransferThreadClusterLengths_K0_M_K1, + ABlockTransferThreadClusterArrangeOrder, + FloatA, + LDSTypeA, + decltype(a_b_k0_m_k1_grid_desc), + decltype(a_b_k0_m_k1_block_desc), + ABlockTransferSrcAccessOrder, + Sequence<0, 2, 1, 3>, + ABlockTransferSrcVectorDim, + 3, + ABlockTransferSrcScalarPerVector, + ABlockTransferDstScalarPerVector_K1, + 1, + 1, + AThreadTransferSrcResetCoordinateAfterRun, + true>( + a_b_k0_m_k1_grid_desc, + make_multi_index(k_batch_id, 0, m_block_data_idx_on_grid, 0), + a_element_op, + a_b_k0_m_k1_block_desc, + make_multi_index(0, 0, 0, 0), + ck::tensor_operation::element_wise::PassThrough{}); + + // B matrix blockwise copy + auto b_blockwise_copy = + ThreadGroupTensorSliceTransfer_v4r1, + BBlockTransferThreadClusterLengths_K0_N_K1, + BBlockTransferThreadClusterArrangeOrder, + FloatB, + LDSTypeB, + decltype(b_b_k0_n_k1_grid_desc), + decltype(b_b_k0_n_k1_block_desc), + BBlockTransferSrcAccessOrder, + Sequence<0, 2, 1, 3>, + BBlockTransferSrcVectorDim, + 3, + BBlockTransferSrcScalarPerVector, + BBlockTransferDstScalarPerVector_K1, + 1, + 1, + BThreadTransferSrcResetCoordinateAfterRun, + true>( + b_b_k0_n_k1_grid_desc, + make_multi_index(k_batch_id, 0, n_block_data_idx_on_grid, 0), + b_element_op, + b_b_k0_n_k1_block_desc, + make_multi_index(0, 0, 0, 0), + ck::tensor_operation::element_wise::PassThrough{}); + + // GEMM definition + // c_mtx += transpose(a_mtx) * b_mtx + // a_mtx[K0PerBlock, MPerBlock] is in LDS + // b_mtx[K0PerBlock, NPerBlock] is in LDS + // c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in + // register + // sanity check + + auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector< + BlockSize, + LDSTypeA, + LDSTypeB, + FloatAcc, + decltype(a_k0_m_k1_block_desc), + decltype(b_k0_n_k1_block_desc), + MPerXDL, + NPerXDL, + MRepeat, + NRepeat, + K1, + LoopSched, + ComputeTypeA, + ComputeTypeB>(); + + auto c_thread_buf = blockwise_gemm.GetCThreadBuffer(); + + // LDS allocation for A and B: be careful of alignment + constexpr auto a_block_space_size = math::integer_least_multiple( + a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align); + + auto p_a_block = reinterpret_cast(p_shared_block); + auto p_b_block = reinterpret_cast(p_a_block + a_block_space_size); + + constexpr auto a_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); + constexpr auto b_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); + + auto a_block_buf = make_dynamic_buffer( + p_a_block, a_k0_m_k1_block_desc.GetElementSpaceSize()); + auto b_block_buf = make_dynamic_buffer( + p_b_block, b_k0_n_k1_block_desc.GetElementSpaceSize()); + + // gridwise GEMM pipeline + const index_t num_k_block_main_loop = __builtin_amdgcn_readfirstlane( + (a_b_k0_m_k1_grid_desc.GetLength(I1) * a_b_k0_m_k1_grid_desc.GetLength(I3)) / + (K0PerBlock * K1)); + + const auto gridwise_gemm_pipeline = GridwiseGemmPipe{}; + + gridwise_gemm_pipeline.template Run(a_b_k0_m_k1_grid_desc, + a_b_k0_m_k1_block_desc, + a_blockwise_copy, + a_grid_buf, + a_block_buf, + a_block_slice_copy_step, + b_b_k0_n_k1_grid_desc, + b_b_k0_n_k1_block_desc, + b_blockwise_copy, + b_grid_buf, + b_block_buf, + b_block_slice_copy_step, + blockwise_gemm, + c_thread_buf, + num_k_block_main_loop); + + // output: register to global memory { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number{}, Number{}, K1), max_lds_align); - } - }(); + constexpr index_t MWave = MPerBlock / (MRepeat * MPerXDL); + constexpr index_t NWave = NPerBlock / (NRepeat * NPerXDL); - constexpr auto a_b_k0_m_k1_block_desc = [&]() { - if constexpr(ABlockLdsExtraM) - { - return make_naive_tensor_descriptor( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - make_tuple(Number{} * Number{} * K1, - Number{} * K1, - K1, - I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - max_lds_align); - } - }(); - // B matrix in LDS memory, dst of blockwise copy - constexpr auto b_k0_n_k1_block_desc = [&]() { - if constexpr(BBlockLdsExtraN) - { - return make_naive_tensor_descriptor( - make_tuple(Number{}, Number{}, K1), - make_tuple(Number{} * K1, K1, I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number{}, Number{}, K1), max_lds_align); - } - }(); + constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc = + blockwise_gemm.GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); - constexpr auto b_b_k0_n_k1_block_desc = [&]() { - if constexpr(BBlockLdsExtraN) - { - return make_naive_tensor_descriptor( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - make_tuple(Number{} * Number{} * K1, - Number{} * K1, - K1, - I1)); - } - else - { - return make_naive_tensor_descriptor_aligned( - make_tuple(Number<1>{}, Number{}, Number{}, K1), - max_lds_align); - } - }(); - // A matrix blockwise copy - auto a_blockwise_copy = - ThreadGroupTensorSliceTransfer_v4r1, - ABlockTransferThreadClusterLengths_K0_M_K1, - ABlockTransferThreadClusterArrangeOrder, - FloatA, - LDSTypeA, - decltype(a_b_k0_m_k1_grid_desc), - decltype(a_b_k0_m_k1_block_desc), - ABlockTransferSrcAccessOrder, - Sequence<0, 2, 1, 3>, - ABlockTransferSrcVectorDim, - 3, - ABlockTransferSrcScalarPerVector, - ABlockTransferDstScalarPerVector_K1, - 1, - 1, - AThreadTransferSrcResetCoordinateAfterRun, - true>( - a_b_k0_m_k1_grid_desc, - make_multi_index(k_batch_id, 0, m_block_data_idx_on_grid, 0), - a_element_op, - a_b_k0_m_k1_block_desc, - make_multi_index(0, 0, 0, 0), - ck::tensor_operation::element_wise::PassThrough{}); + constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc = + blockwise_gemm.GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); - // B matrix blockwise copy - auto b_blockwise_copy = - ThreadGroupTensorSliceTransfer_v4r1, - BBlockTransferThreadClusterLengths_K0_N_K1, - BBlockTransferThreadClusterArrangeOrder, - FloatB, - LDSTypeB, - decltype(b_b_k0_n_k1_grid_desc), - decltype(b_b_k0_n_k1_block_desc), - BBlockTransferSrcAccessOrder, - Sequence<0, 2, 1, 3>, - BBlockTransferSrcVectorDim, - 3, - BBlockTransferSrcScalarPerVector, - BBlockTransferDstScalarPerVector_K1, - 1, - 1, - BThreadTransferSrcResetCoordinateAfterRun, - true>( - b_b_k0_n_k1_grid_desc, - make_multi_index(k_batch_id, 0, n_block_data_idx_on_grid, 0), - b_element_op, - b_b_k0_n_k1_block_desc, - make_multi_index(0, 0, 0, 0), - ck::tensor_operation::element_wise::PassThrough{}); + constexpr auto M0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I0); + constexpr auto N0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I1); + constexpr auto M1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I2); + constexpr auto N1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I3); + constexpr auto M2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I4); + constexpr auto M3 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I5); + constexpr auto M4 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I6); + constexpr auto N2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I7); - // GEMM definition - // c_mtx += transpose(a_mtx) * b_mtx - // a_mtx[K0PerBlock, MPerBlock] is in LDS - // b_mtx[K0PerBlock, NPerBlock] is in LDS - // c_mtx[MPerBlock, NPerBlock] is distributed among threads, and saved in - // register - // sanity check + constexpr auto c_block_desc_mblock_mperblock_nblock_nperblock = + GetCBlockDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(); - auto blockwise_gemm = BlockwiseGemmXdlops_k0mk1_k0nk1_m0n0m1n1m2m3m4n2_Selector< - BlockSize, - LDSTypeA, - LDSTypeB, - FloatAcc, - decltype(a_k0_m_k1_block_desc), - decltype(b_k0_n_k1_block_desc), - MPerXDL, - NPerXDL, - MRepeat, - NRepeat, - K1, - LoopSched, - ComputeTypeA, - ComputeTypeB>(); + auto c_block_buf = make_dynamic_buffer( + static_cast(p_shared_block), + c_block_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); - auto c_thread_buf = blockwise_gemm.GetCThreadBuffer(); + constexpr auto c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2 = transform_tensor_descriptor( + c_block_desc_mblock_mperblock_nblock_nperblock, + make_tuple(make_freeze_transform(I0), // freeze mblock + make_unmerge_transform( + make_tuple(CShuffleMRepeatPerShuffle, + M1, + M2, + M3, + M4)), // M1 = MWave, M2 * M3 * M4 = MPerXDL + make_freeze_transform(I0), // freeze nblock + make_unmerge_transform( + make_tuple(CShuffleNRepeatPerShuffle, + N1, + N2))), // M1 = MWave, M2 * M3 * M4 = MPerXDL + make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), + make_tuple(Sequence<>{}, + Sequence<0, 2, 4, 5, 6>{}, + Sequence<>{}, + Sequence<1, 3, 7>{})); - // LDS allocation for A and B: be careful of alignment - constexpr auto a_block_space_size = - math::integer_least_multiple(a_k0_m_k1_block_desc.GetElementSpaceSize(), max_lds_align); + // calculate origin of thread output tensor on global memory + // blockwise GEMM c matrix starting index + const auto c_thread_mtx_on_block = + blockwise_gemm.CalculateCThreadOriginDataIndex(I0, I0, I0, I0); - auto p_a_block = reinterpret_cast(p_shared_block); - auto p_b_block = reinterpret_cast(p_a_block + a_block_space_size); + const index_t m_thread_data_on_block = c_thread_mtx_on_block[I0]; + const index_t n_thread_data_on_block = c_thread_mtx_on_block[I1]; - constexpr auto a_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); - constexpr auto b_block_slice_copy_step = make_multi_index(0, K0PerBlock, 0, 0); + const auto m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor = + make_single_stage_tensor_adaptor( + make_tuple(make_merge_transform(make_tuple(M0, M1, M2, M3, M4))), + make_tuple(Sequence<0, 1, 2, 3, 4>{}), + make_tuple(Sequence<0>{})); - auto a_block_buf = make_dynamic_buffer( - p_a_block, a_k0_m_k1_block_desc.GetElementSpaceSize()); - auto b_block_buf = make_dynamic_buffer( - p_b_block, b_k0_n_k1_block_desc.GetElementSpaceSize()); + const auto m_thread_data_on_block_idx = + m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor.CalculateBottomIndex( + make_multi_index(m_thread_data_on_block)); - // gridwise GEMM pipeline - const index_t num_k_block_main_loop = __builtin_amdgcn_readfirstlane( - (a_b_k0_m_k1_grid_desc.GetLength(I1) * a_b_k0_m_k1_grid_desc.GetLength(I3)) / - (K0PerBlock * K1)); + const auto n_thread_data_on_block_to_n0_n1_n2_adaptor = + make_single_stage_tensor_adaptor( + make_tuple(make_merge_transform(make_tuple(N0, N1, N2))), + make_tuple(Sequence<0, 1, 2>{}), + make_tuple(Sequence<0>{})); - const auto gridwise_gemm_pipeline = GridwiseGemmPipe{}; + const auto n_thread_data_on_block_idx = + n_thread_data_on_block_to_n0_n1_n2_adaptor.CalculateBottomIndex( + make_multi_index(n_thread_data_on_block)); - gridwise_gemm_pipeline.template Run(a_b_k0_m_k1_grid_desc, - a_b_k0_m_k1_block_desc, - a_blockwise_copy, - a_grid_buf, - a_block_buf, - a_block_slice_copy_step, - b_b_k0_n_k1_grid_desc, - b_b_k0_n_k1_block_desc, - b_blockwise_copy, - b_grid_buf, - b_block_buf, - b_block_slice_copy_step, - blockwise_gemm, - c_thread_buf, - num_k_block_main_loop); + // VGPR to LDS + auto c_thread_copy_vgpr_to_lds = ThreadwiseTensorSliceTransfer_v1r3< + FloatAcc, + FloatC, + decltype(c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc), + decltype(c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2), + ck::tensor_operation::element_wise::PassThrough, + Sequence, + Sequence<0, 1, 2, 3, 4, 5, 6, 7>, + 7, + 1, + InMemoryDataOperationEnum::Set, + 1, + true>{c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, + make_multi_index(0, + 0, + m_thread_data_on_block_idx[I1], + n_thread_data_on_block_idx[I1], + m_thread_data_on_block_idx[I2], + m_thread_data_on_block_idx[I3], + m_thread_data_on_block_idx[I4], + n_thread_data_on_block_idx[I2]), + ck::tensor_operation::element_wise::PassThrough{}}; - // output: register to global memory - { - constexpr index_t MWave = MPerBlock / (MRepeat * MPerXDL); - constexpr index_t NWave = NPerBlock / (NRepeat * NPerXDL); + // LDS to global + auto c_block_copy_lds_to_global = ThreadGroupTensorSliceTransfer_v6r1< + ThisThreadBlock, // index_t BlockSize, + CElementwiseOperation, // ElementwiseOperation, + CGlobalMemoryDataOperation, // DstInMemOp, + Sequence<1, + CShuffleMRepeatPerShuffle * MWave * MPerXDL, + 1, + CShuffleNRepeatPerShuffle * NWave * NPerXDL>, // BlockSliceLengths, + CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, + Sequence<0, 1, 2, 3>, // typename ThreadClusterArrangeOrder, + FloatC, // typename SrcData, + FloatC, // typename DstData, + decltype(c_block_desc_mblock_mperblock_nblock_nperblock), + decltype(c_grid_desc_mblock_mperblock_nblock_nperblock), + Sequence<0, 1, 2, 3>, // typename DimAccessOrder, + 3, // index_t VectorDim, + CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector, + true, // bool ThreadTransferSrcResetCoordinateAfterRun, + false> // bool ThreadTransferDstResetCoordinateAfterRun + {c_block_desc_mblock_mperblock_nblock_nperblock, + make_multi_index(0, 0, 0, 0), + c_grid_desc_mblock_mperblock_nblock_nperblock, + make_multi_index(block_m_id, 0, block_n_id, 0), + c_element_op}; - constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc = - blockwise_gemm.GetCBlockDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); + constexpr auto mxdlperwave_forward_step = + make_multi_index(0, CShuffleMRepeatPerShuffle * MWave * MPerXDL, 0, 0); + constexpr auto nxdlperwave_forward_step = + make_multi_index(0, 0, 0, CShuffleNRepeatPerShuffle * NWave * NPerXDL); + constexpr auto nxdlperwave_backward_step = + make_multi_index(0, 0, 0, -CShuffleNRepeatPerShuffle * NWave * NPerXDL); - constexpr auto c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc = - blockwise_gemm.GetCThreadDescriptor_M0_N0_M1_N1_M2_M3_M4_N2(); + static_for<0, MRepeat, CShuffleMRepeatPerShuffle>{}([&](auto mxdlperwave_iter) { + constexpr auto mxdlperwave = mxdlperwave_iter; - constexpr auto M0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I0); - constexpr auto N0 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I1); - constexpr auto M1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I2); - constexpr auto N1 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I3); - constexpr auto M2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I4); - constexpr auto M3 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I5); - constexpr auto M4 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I6); - constexpr auto N2 = c_m0_n0_m1_n1_m2_m3_m4_n2_block_desc.GetLength(I7); + static_for<0, NRepeat, CShuffleNRepeatPerShuffle>{}([&](auto nxdlperwave_iter) { + constexpr bool nxdlperwave_forward_sweep = + (mxdlperwave % (2 * CShuffleMRepeatPerShuffle) == 0); - constexpr auto c_block_desc_mblock_mperblock_nblock_nperblock = - GetCBlockDescriptor_MBlock_MPerBlock_NBlock_NPerBlock(); + constexpr index_t nxdlperwave_value = + nxdlperwave_forward_sweep + ? nxdlperwave_iter + : (NRepeat - nxdlperwave_iter - CShuffleNRepeatPerShuffle); - auto c_block_buf = make_dynamic_buffer( - static_cast(p_shared_block), - c_block_desc_mblock_mperblock_nblock_nperblock.GetElementSpaceSize()); + constexpr auto nxdlperwave = Number{}; - constexpr auto c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2 = transform_tensor_descriptor( - c_block_desc_mblock_mperblock_nblock_nperblock, - make_tuple( - make_freeze_transform(I0), // freeze mblock - make_unmerge_transform(make_tuple(CShuffleMRepeatPerShuffle, - M1, - M2, - M3, - M4)), // M1 = MWave, M2 * M3 * M4 = MPerXDL - make_freeze_transform(I0), // freeze nblock - make_unmerge_transform(make_tuple(CShuffleNRepeatPerShuffle, - N1, - N2))), // M1 = MWave, M2 * M3 * M4 = MPerXDL - make_tuple(Sequence<0>{}, Sequence<1>{}, Sequence<2>{}, Sequence<3>{}), - make_tuple( - Sequence<>{}, Sequence<0, 2, 4, 5, 6>{}, Sequence<>{}, Sequence<1, 3, 7>{})); + // make sure it's safe to do ds_write + block_sync_lds(); - // calculate origin of thread output tensor on global memory - // blockwise GEMM c matrix starting index - const auto c_thread_mtx_on_block = - blockwise_gemm.CalculateCThreadOriginDataIndex(I0, I0, I0, I0); + // VGPR to LDS + c_thread_copy_vgpr_to_lds.Run( + c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc, + make_tuple(mxdlperwave, nxdlperwave, I0, I0, I0, I0, I0, I0), + c_thread_buf, + c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, + c_block_buf); - const index_t m_thread_data_on_block = c_thread_mtx_on_block[I0]; - const index_t n_thread_data_on_block = c_thread_mtx_on_block[I1]; + // make sure it's safe to do ds_read + block_sync_lds(); - const auto m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor = - make_single_stage_tensor_adaptor( - make_tuple(make_merge_transform(make_tuple(M0, M1, M2, M3, M4))), - make_tuple(Sequence<0, 1, 2, 3, 4>{}), - make_tuple(Sequence<0>{})); + // LDS to global + c_block_copy_lds_to_global.Run( + c_block_desc_mblock_mperblock_nblock_nperblock, + c_block_buf, + c_grid_desc_mblock_mperblock_nblock_nperblock, + c_grid_buf); - const auto m_thread_data_on_block_idx = - m_thread_data_on_block_to_m0_m1_m2_m3_m4_adaptor.CalculateBottomIndex( - make_multi_index(m_thread_data_on_block)); + // move on nxdlperwave dimension + if constexpr(nxdlperwave_forward_sweep && + (nxdlperwave < NRepeat - CShuffleNRepeatPerShuffle)) + { + c_block_copy_lds_to_global.MoveDstSliceWindow( + c_grid_desc_mblock_mperblock_nblock_nperblock, + nxdlperwave_forward_step); + } + else if constexpr((!nxdlperwave_forward_sweep) && (nxdlperwave > 0)) + { + c_block_copy_lds_to_global.MoveDstSliceWindow( + c_grid_desc_mblock_mperblock_nblock_nperblock, + nxdlperwave_backward_step); + } + }); - const auto n_thread_data_on_block_to_n0_n1_n2_adaptor = - make_single_stage_tensor_adaptor( - make_tuple(make_merge_transform(make_tuple(N0, N1, N2))), - make_tuple(Sequence<0, 1, 2>{}), - make_tuple(Sequence<0>{})); - - const auto n_thread_data_on_block_idx = - n_thread_data_on_block_to_n0_n1_n2_adaptor.CalculateBottomIndex( - make_multi_index(n_thread_data_on_block)); - - // VGPR to LDS - auto c_thread_copy_vgpr_to_lds = - ThreadwiseTensorSliceTransfer_v1r3, - Sequence<0, 1, 2, 3, 4, 5, 6, 7>, - 7, - 1, - InMemoryDataOperationEnum::Set, - 1, - true>{ - c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, - make_multi_index(0, - 0, - m_thread_data_on_block_idx[I1], - n_thread_data_on_block_idx[I1], - m_thread_data_on_block_idx[I2], - m_thread_data_on_block_idx[I3], - m_thread_data_on_block_idx[I4], - n_thread_data_on_block_idx[I2]), - ck::tensor_operation::element_wise::PassThrough{}}; - - // LDS to global - auto c_block_copy_lds_to_global = ThreadGroupTensorSliceTransfer_v6r1< - ThisThreadBlock, // index_t BlockSize, - CElementwiseOperation, // ElementwiseOperation, - CGlobalMemoryDataOperation, // DstInMemOp, - Sequence<1, - CShuffleMRepeatPerShuffle * MWave * MPerXDL, - 1, - CShuffleNRepeatPerShuffle * NWave * NPerXDL>, // BlockSliceLengths, - CBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock, - Sequence<0, 1, 2, 3>, // typename ThreadClusterArrangeOrder, - FloatC, // typename SrcData, - FloatC, // typename DstData, - decltype(c_block_desc_mblock_mperblock_nblock_nperblock), - decltype(c_grid_desc_mblock_mperblock_nblock_nperblock), - Sequence<0, 1, 2, 3>, // typename DimAccessOrder, - 3, // index_t VectorDim, - CBlockTransferScalarPerVector_NWaveNPerXDL, // index_t ScalarPerVector, - true, // bool ThreadTransferSrcResetCoordinateAfterRun, - false> // bool ThreadTransferDstResetCoordinateAfterRun - {c_block_desc_mblock_mperblock_nblock_nperblock, - make_multi_index(0, 0, 0, 0), - c_grid_desc_mblock_mperblock_nblock_nperblock, - make_multi_index(block_m_id, 0, block_n_id, 0), - c_element_op}; - - constexpr auto mxdlperwave_forward_step = - make_multi_index(0, CShuffleMRepeatPerShuffle * MWave * MPerXDL, 0, 0); - constexpr auto nxdlperwave_forward_step = - make_multi_index(0, 0, 0, CShuffleNRepeatPerShuffle * NWave * NPerXDL); - constexpr auto nxdlperwave_backward_step = - make_multi_index(0, 0, 0, -CShuffleNRepeatPerShuffle * NWave * NPerXDL); - - static_for<0, MRepeat, CShuffleMRepeatPerShuffle>{}([&](auto mxdlperwave_iter) { - constexpr auto mxdlperwave = mxdlperwave_iter; - - static_for<0, NRepeat, CShuffleNRepeatPerShuffle>{}([&](auto nxdlperwave_iter) { - constexpr bool nxdlperwave_forward_sweep = - (mxdlperwave % (2 * CShuffleMRepeatPerShuffle) == 0); - - constexpr index_t nxdlperwave_value = - nxdlperwave_forward_sweep - ? nxdlperwave_iter - : (NRepeat - nxdlperwave_iter - CShuffleNRepeatPerShuffle); - - constexpr auto nxdlperwave = Number{}; - - // make sure it's safe to do ds_write - block_sync_lds(); - - // VGPR to LDS - c_thread_copy_vgpr_to_lds.Run( - c_m0_n0_m1_n1_m2_m3_m4_n2_thread_desc, - make_tuple(mxdlperwave, nxdlperwave, I0, I0, I0, I0, I0, I0), - c_thread_buf, - c_block_desc_m0_n0_m1_n1_m2_m3_m4_n2, - c_block_buf); - - // make sure it's safe to do ds_read - block_sync_lds(); - - // LDS to global - c_block_copy_lds_to_global.Run(c_block_desc_mblock_mperblock_nblock_nperblock, - c_block_buf, - c_grid_desc_mblock_mperblock_nblock_nperblock, - c_grid_buf); - - // move on nxdlperwave dimension - if constexpr(nxdlperwave_forward_sweep && - (nxdlperwave < NRepeat - CShuffleNRepeatPerShuffle)) + // move on mxdlperwave dimension + if constexpr(mxdlperwave < MRepeat - CShuffleMRepeatPerShuffle) { c_block_copy_lds_to_global.MoveDstSliceWindow( c_grid_desc_mblock_mperblock_nblock_nperblock, - nxdlperwave_forward_step); - } - else if constexpr((!nxdlperwave_forward_sweep) && (nxdlperwave > 0)) - { - c_block_copy_lds_to_global.MoveDstSliceWindow( - c_grid_desc_mblock_mperblock_nblock_nperblock, - nxdlperwave_backward_step); + mxdlperwave_forward_step); } }); - - // move on mxdlperwave dimension - if constexpr(mxdlperwave < MRepeat - CShuffleMRepeatPerShuffle) - { - c_block_copy_lds_to_global.MoveDstSliceWindow( - c_grid_desc_mblock_mperblock_nblock_nperblock, mxdlperwave_forward_step); - } - }); + } } } diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_gemm_xdlops_v3r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_permute.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_buffer_value.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_set_multiple_buffer_value.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_softmax.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_sparse_embeddings_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_data.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_gamma_beta.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_naive_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_selector.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_1st.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_splitk_2nd.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/normalization/gridwise_normalization_welford_variance.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp b/include/ck/tensor_operation/gpu/thread/reduction_functions_threadwise.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_contraction_dl.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_gemm_dlops_v3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_set.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1_dequant.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v3r1_dequant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v4r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v5r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r1r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v6r3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_tensor_slice_transfer_v7r2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp b/include/ck/tensor_operation/gpu/thread/threadwise_welford.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp b/include/ck/tensor_operation/gpu/warp/dpp_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm_arraybase.hpp b/include/ck/tensor_operation/operator_transform/transform_contraction_to_gemm_arraybase.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_bwd_data_to_gemm_v1.hpp old mode 100644 new mode 100755 diff --git a/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp b/include/ck/tensor_operation/operator_transform/transform_conv_fwd_to_gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_address_space.hpp b/include/ck/utility/amd_address_space.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_buffer_addressing.hpp b/include/ck/utility/amd_buffer_addressing.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_gemm_dpp.hpp b/include/ck/utility/amd_gemm_dpp.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_inline_asm.hpp b/include/ck/utility/amd_inline_asm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_lds.hpp b/include/ck/utility/amd_lds.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_wave_read_first_lane.hpp b/include/ck/utility/amd_wave_read_first_lane.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/amd_xdlops.hpp b/include/ck/utility/amd_xdlops.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/array.hpp b/include/ck/utility/array.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/array_multi_index.hpp b/include/ck/utility/array_multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/c_style_pointer_cast.hpp b/include/ck/utility/c_style_pointer_cast.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/common_header.hpp b/include/ck/utility/common_header.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/container_element_picker.hpp b/include/ck/utility/container_element_picker.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/container_helper.hpp b/include/ck/utility/container_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/data_type.hpp b/include/ck/utility/data_type.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/debug.hpp b/include/ck/utility/debug.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/dynamic_buffer.hpp b/include/ck/utility/dynamic_buffer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/enable_if.hpp b/include/ck/utility/enable_if.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/f8_utils.hpp b/include/ck/utility/f8_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional.hpp b/include/ck/utility/functional.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional2.hpp b/include/ck/utility/functional2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional3.hpp b/include/ck/utility/functional3.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/functional4.hpp b/include/ck/utility/functional4.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/generic_memory_space_atomic.hpp b/include/ck/utility/generic_memory_space_atomic.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/get_id.hpp b/include/ck/utility/get_id.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/get_shift.hpp b/include/ck/utility/get_shift.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/ignore.hpp b/include/ck/utility/ignore.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/inner_product.hpp b/include/ck/utility/inner_product.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/inner_product_dpp8.hpp b/include/ck/utility/inner_product_dpp8.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/integral_constant.hpp b/include/ck/utility/integral_constant.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/is_detected.hpp b/include/ck/utility/is_detected.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/is_known_at_compile_time.hpp b/include/ck/utility/is_known_at_compile_time.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/loop_scheduler.hpp b/include/ck/utility/loop_scheduler.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/magic_division.hpp b/include/ck/utility/magic_division.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/math.hpp b/include/ck/utility/math.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/math_v2.hpp b/include/ck/utility/math_v2.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/multi_index.hpp b/include/ck/utility/multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/number.hpp b/include/ck/utility/number.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/random_gen.hpp b/include/ck/utility/random_gen.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_common.hpp b/include/ck/utility/reduction_common.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_enums.hpp b/include/ck/utility/reduction_enums.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_functions_accumulate.hpp b/include/ck/utility/reduction_functions_accumulate.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/reduction_operator.hpp b/include/ck/utility/reduction_operator.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/sequence.hpp b/include/ck/utility/sequence.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/sequence_helper.hpp b/include/ck/utility/sequence_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/span.hpp b/include/ck/utility/span.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/static_buffer.hpp b/include/ck/utility/static_buffer.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/statically_indexed_array.hpp b/include/ck/utility/statically_indexed_array.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/statically_indexed_array_multi_index.hpp b/include/ck/utility/statically_indexed_array_multi_index.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/synchronization.hpp b/include/ck/utility/synchronization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/thread_group.hpp b/include/ck/utility/thread_group.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/transpose_vectors.hpp b/include/ck/utility/transpose_vectors.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/tuple.hpp b/include/ck/utility/tuple.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/tuple_helper.hpp b/include/ck/utility/tuple_helper.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/type.hpp b/include/ck/utility/type.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/type_convert.hpp b/include/ck/utility/type_convert.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/workgroup_barrier.hpp b/include/ck/utility/workgroup_barrier.hpp old mode 100644 new mode 100755 diff --git a/include/ck/utility/workgroup_synchronization.hpp b/include/ck/utility/workgroup_synchronization.hpp old mode 100644 new mode 100755 diff --git a/include/ck/version.h.in b/include/ck/version.h.in old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/layout.hpp b/include/ck/wrapper/layout.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/operations/copy.hpp b/include/ck/wrapper/operations/copy.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/operations/gemm.hpp b/include/ck/wrapper/operations/gemm.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/tensor.hpp b/include/ck/wrapper/tensor.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/traits/blockwise_gemm_xdl_traits.hpp b/include/ck/wrapper/traits/blockwise_gemm_xdl_traits.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/kernel_utils.hpp b/include/ck/wrapper/utils/kernel_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/layout_utils.hpp b/include/ck/wrapper/utils/layout_utils.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/tensor_partition.hpp b/include/ck/wrapper/utils/tensor_partition.hpp old mode 100644 new mode 100755 diff --git a/include/ck/wrapper/utils/tensor_utils.hpp b/include/ck/wrapper/utils/tensor_utils.hpp old mode 100644 new mode 100755 diff --git a/library/CMakeLists.txt b/library/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_avgpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_cgemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_column_to_image.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_contraction.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_fpAintB_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_gemm_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_groupnorm_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_image_to_column.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_layernorm_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_reduce.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_softmax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp b/library/include/ck/library/reference_tensor_operation/cpu/reference_sparse_embedding3_forward_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp b/library/include/ck/library/reference_tensor_operation/gpu/naive_conv_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp b/library/include/ck/library/tensor_operation_instance/add_device_operation_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp b/library/include/ck/library/tensor_operation_instance/device_operation_instance_factory.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/avg_pool3d_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_bias_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_backward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp b/library/include/ck/library/tensor_operation_instance/gpu/batchnorm_infer.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction/device_contraction_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/contraction_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_column_to_image_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/conv_tensor_rearrange/device_image_to_column_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_backward_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/convolution_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_elementwise_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_mean_squaremean_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/elementwise_normalization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_add_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_multiply.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_silu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_add_silu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_multiply_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/gemm_streamk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_f16_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_f16_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_i8_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_wmma_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_bilinear_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_scale_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_data/device_grouped_conv_bwd_data_xdl_scale_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_dl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_wmma_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_bwd_weight/device_grouped_conv_bwd_weight_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_dl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_wmma_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_bilinear_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scale_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scale_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_ab_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_conv_fwd/device_grouped_conv_fwd_xdl_scaleadd_scaleadd_relu_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_data_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_backward_weight.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_bilinear.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_ab.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_convolution_forward_scaleadd_scaleadd_relu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_bias.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fastgelu.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp b/library/include/ck/library/tensor_operation_instance/gpu/grouped_gemm_fixed_nk.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_gamma_beta.hpp b/library/include/ck/library/tensor_operation_instance/gpu/groupnorm_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_data.hpp b/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_data.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_gamma_beta.hpp b/library/include/ck/library/tensor_operation_instance/gpu/layernorm_bwd_gamma_beta.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/max_pool_bwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd_swish.hpp b/library/include/ck/library/tensor_operation_instance/gpu/normalization_fwd_swish.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/permute_scale.hpp b/library/include/ck/library/tensor_operation_instance/gpu/permute_scale.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/permute_scale/device_permute_scale_instances.hpp b/library/include/ck/library/tensor_operation_instance/gpu/permute_scale/device_permute_scale_instances.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp b/library/include/ck/library/tensor_operation_instance/gpu/pool3d_fwd.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/gemm_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perchannel_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_bias_forward_perlayer_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perchannel_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp b/library/include/ck/library/tensor_operation_instance/gpu/quantization/grouped_convolution_forward_perlayer_quantization.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_impl_common.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp b/library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_type.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_type.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp b/library/include/ck/library/tensor_operation_instance/gpu/transpose/device_transpose_instance.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp b/library/include/ck/library/tensor_operation_instance/gpu/transpose_3d.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/algorithm.hpp b/library/include/ck/library/utility/algorithm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/check_err.hpp b/library/include/ck/library/utility/check_err.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/conv_common.hpp b/library/include/ck/library/utility/conv_common.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp b/library/include/ck/library/utility/convolution_host_tensor_descriptor_helper.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/convolution_parameter.hpp b/library/include/ck/library/utility/convolution_parameter.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/device_memory.hpp b/library/include/ck/library/utility/device_memory.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/fill.hpp b/library/include/ck/library/utility/fill.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_common_util.hpp b/library/include/ck/library/utility/host_common_util.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_gemm.hpp b/library/include/ck/library/utility/host_gemm.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_tensor.hpp b/library/include/ck/library/utility/host_tensor.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/host_tensor_generator.hpp b/library/include/ck/library/utility/host_tensor_generator.hpp old mode 100644 new mode 100755 index 6fd7ed8aa8..b55cfb6558 --- a/library/include/ck/library/utility/host_tensor_generator.hpp +++ b/library/include/ck/library/utility/host_tensor_generator.hpp @@ -142,6 +142,45 @@ struct GeneratorTensor_3 } }; +// template +// struct GeneratorTensor_3_control_entropy +// { + +// // constexpr static int fp32_exponent_size = 8; +// // constexpr static int fp32_mantissa_size = 23; +// constexpr static int fp16_exponent_size = 5; +// constexpr static int fp16_mantissa_size = 10; + +// mutable std::mt19937 gen{std::random_device{}()}; +// mutable std::uniform_int_distribution dis{0, 0xFFFF}; +// float MAX_FP32_NUM = 0xFFFFFFFF; +// float MAX_FP16_NUM = 0xFFFF; + +// template +// T operator()(Is...) const +// { +// uint16_t bits = dis(gen); // Generate 32 random bits + +// // Combine the bits into a floating-point number according to IEEE 754 format +// uint16_t sign = bits >> 15; // Get the sign bit +// uint16_t exponent = (bits >> 10) & 0x1F; // Get the exponent bits +// uint16_t mantissa = bits & 0x3FF; // Get the mantissa bits + +// if(exponent == 0x1F || exponent == 0) +// { +// // Avoid NaN and denormalized numbers +// return static_cast(0.0); +// } + +// // Shift the exponent to the correct position and set the sign bit + +// int16_t result = (sign << 15) | ((exponent - 15 + 0x1F) << 10) | +// (mantissa >> (10 - fp16_mantissa_size)); +// float v= static_cast(*reinterpret_cast(std::bitset<16>(result).to_string().c_str())) / MAX_FP16_NUM; +// printf("%0f,\t", v / MAX_FP16_NUM); +// return static_cast(vector_type_maker / MAX_FP16_NUM); +// } +// }; template <> struct GeneratorTensor_3 { diff --git a/library/include/ck/library/utility/iterator.hpp b/library/include/ck/library/utility/iterator.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/literals.hpp b/library/include/ck/library/utility/literals.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/numeric.hpp b/library/include/ck/library/utility/numeric.hpp old mode 100644 new mode 100755 diff --git a/library/include/ck/library/utility/ranges.hpp b/library/include/ck/library/utility/ranges.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/avg_pool3d_bwd_ndhwc_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/avg_pool3d_bwd/device_avg_pool3d_bwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_bf16_bf16_bf16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f16_f16_f16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_f32_f32_f32_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm/device_batched_gemm_xdl_int8_int8_int8_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_add_relu_gemm_add/device_batched_gemm_add_relu_gemm_add_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_bias_permute/device_batched_gemm_bias_permute_m2_n3_k1_xdl_c_shuffle_f16_f16_f16_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_gemm/device_batched_gemm_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gon_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gkm_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_f16_f16_f16_gmk_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gkm_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gkn_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_multi_d/device_batched_gemm_multi_d_dl_i8_i8_i8_gmk_gnk_gmn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gkm_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gkn_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_reduce/device_batched_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_gmk_gnk_gmn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm/device_batched_gemm_softmax_gemm_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_bias_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_bf16_bf16_bf16_bf16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp b/library/src/tensor_operation_instance/gpu/batched_gemm_softmax_gemm_permute/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle_f16_f16_f16_f16_gmk_gnk_gno_gmo_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_backward_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_forward_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp b/library/src/tensor_operation_instance/gpu/batchnorm/device_batchnorm_infer_f64_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/column_to_image/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gndhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gndhwc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnhwc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_gnwc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_ndhwgc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_ndhwgc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nhwgc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/column_to_image/device_column_to_image_nwgc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_bf16_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_f16_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_bf16_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_compute_f16_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_compute_f32_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_kknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_knnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mknn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_bilinear/device_contraction_bilinear_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_f64_mnnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/contraction_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_bf16_bf16_bf16_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f16_f16_f16_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_bf16_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_compute_f16_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f32_f32_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_compute_f32_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_kkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_knn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mkn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp b/library/src/tensor_operation_instance/gpu/contraction_scale/device_contraction_scale_m2_n2_k2_xdl_c_shuffle_f64_f64_f64_mnn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv1d_bwd_data/device_conv1d_bwd_data_xdl_nwc_kxc_nwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_dl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_bwd_data/device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_c_shuffle_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd/device_conv2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu/device_conv2d_fwd_xdl_c_shuffle_bias_relu_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv2d_fwd_bias_relu_add/device_conv2d_fwd_xdl_c_shuffle_bias_relu_add_nhwc_kyxc_nhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/conv3d_bwd_data/device_conv3d_bwd_data_xdl_ndhwc_kzyxc_ndhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise/device_normalize_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/elementwise_normalization/device_elementwise_normalization_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dl_i8_i8_i8_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_km_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_dpp_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_wmma_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_2_stage_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_bf16_bf16_bf16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f16_f8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_interwave_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v1_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_default_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_kn_mn_v2_padded_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_fp8_fp8_fp8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_c_shuffle_lds_direct_load_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_kn_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/km_nk_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_kn_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_add_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_default_pipeline_v2_opt_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_default_pipeline_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f16_f16_f16/mk_nk_mn_irregular_interwave_pipeline_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm/device_gemm_xdl_f64_f64_f64_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add/device_gemm_add_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_add_fastgelu/device_gemm_add_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_fastgelu/device_gemm_add_fastgelu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_multiply/device_gemm_add_multiply_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu/device_gemm_add_relu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_km_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_relu_add_layernorm/device_gemm_add_relu_add_xdl_c_shuffle_layernorm_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_add_silu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_bf16_i8_bf16_bf16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_add_silu/device_gemm_add_silu_xdl_c_shuffle_f16_i8_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bias_add_reduce/device_gemm_bias_add_mean_squaremean_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_wmma_c_shuffle_i8_i8_i8_i8_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_km_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_kn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_bilinear/device_gemm_bilinear_xdl_c_shuffle_f16_f16_f16_f16_mk_nk_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_fastgelu/device_gemm_fastgelu_xdl_c_shuffle_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f16_f16_f16_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_kn_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_multiply_add/device_gemm_multiply_add_xdl_c_shuffle_f16_f8_f32_f32_f16_mk_nk_mn_mn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_reduce/device_gemm_reduce_xdl_cshuffle_f16_f16_f16_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_splitk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_comp_fp8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_interwave_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v1_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_v2_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_interwave_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v1_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_v2_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_kpb128_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f16_fp8_f16_mk_nk_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_f32_f32_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v1_interwave_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_kn_mn_v2_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_fp8_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_splitk/device_gemm_xdl_splitk_lds_direct_load_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/gemm_streamk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/gemm_streamk/device_gemm_xdl_streamk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/dl/device_grouped_conv1d_bwd_weight_dl_nwgc_gkxc_nwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_bwd_weight/xdl/device_grouped_conv1d_bwd_weight_xdl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv1d_fwd/xdl/device_grouped_conv1d_fwd_xdl_gnwc_gkxc_gnwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/wmma/device_grouped_conv2d_bwd_data_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_data/xdl/device_grouped_conv2d_bwd_data_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/dl/device_grouped_conv2d_bwd_weight_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_bwd_weight/xdl/device_grouped_conv2d_bwd_weight_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/dl/device_grouped_conv2d_fwd_dl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_gnhwc_gkyxc_gnhwk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/wmma/device_grouped_conv2d_fwd_wmma_nhwgc_gkyxc_nhwgk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_gnhwc_gkyxc_gnhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv2d_fwd/xdl/device_grouped_conv2d_fwd_xdl_nhwgc_gkyxc_nhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/wmma/device_grouped_conv3d_bwd_data_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data/xdl/device_grouped_conv3d_bwd_data_xdl_ndhwgc_gkzyxc_ndhwgk_input_f16_comp_bf8_f8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_bilinear/xdl/device_grouped_conv3d_bwd_data_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_data_scale/xdl/device_grouped_conv3d_bwd_data_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/dl/device_grouped_conv3d_bwd_weight_dl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/wmma/device_grouped_conv3d_bwd_weight_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_bf8_fp8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_bwd_weight/xdl/device_grouped_conv3d_bwd_weight_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_gndhwc_gkzyxc_gndhwk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_f16_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_1x1s1p0_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/wmma/device_grouped_conv3d_fwd_wmma_ndhwgc_gkzyxc_ndhwgk_i8_oddc_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_gndhwc_gkzyxc_gndhwk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_comp_fp8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd/xdl/device_grouped_conv3d_fwd_xdl_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_bilinear/xdl/device_grouped_conv3d_fwd_xdl_bilinear_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scale/xdl/device_grouped_conv3d_fwd_xdl_scale_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_ab/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_ab_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_conv3d_fwd_scaleadd_scaleadd_relu/xdl/device_grouped_conv3d_fwd_xdl_scaleadd_scaleadd_relu_ndhwgc_gkzyxc_ndhwgk_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f16_f16_mk_nk_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f16_f8_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm/device_grouped_gemm_xdl_splitk_f8_f16_f16_mk_kn_mn_irregular_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_bias/device_grouped_gemm_xdl_fixed_nk_bias_f16_f16_f32_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fastgelu/device_grouped_gemm_fastgelu_xdl_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_bf16_i8_bf16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_f16_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_fp8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/grouped_gemm_fixed_nk/device_grouped_gemm_xdl_fixed_nk_f16_i8_f16_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/image_to_column/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gndhwc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gndhwc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnhwc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnhwc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnwc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_gnwc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_ndhwgc_3d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nhwgc_2d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp b/library/src/tensor_operation_instance/gpu/image_to_column/device_image_to_column_nwgc_1d_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/max_pool_bwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/device_max_pool_bwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/max_pool_bwd/max_pool_bwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_groupnorm_bwd_data_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_groupnorm_bwd_data_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/device_layernorm2d_bwd_data_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_data/normalization_bwd_data_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_data/normalization_bwd_data_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_groupnorm_bwd_gamma_beta_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_groupnorm_bwd_gamma_beta_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/device_layernorm2d_bwd_gamma_beta_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/normalization_bwd_gamma_beta_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_bwd_gamma_beta/normalization_bwd_gamma_beta_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/normalization_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_f32_f32_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_groupnorm_fwd_swish_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm2d_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/device_layernorm4d_fwd_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/normalization_fwd/normalization_fwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/permute_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_1d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_1d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_2d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_2d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_3d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_3d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_4d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_4d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_5d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_5d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_6d_instances.cpp b/library/src/tensor_operation_instance/gpu/permute_scale/device_permute_scale_6d_instances.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/pool3d_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_avg_pool3d_fwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_bf16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f16_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/device_max_pool3d_fwd_ndhwc_f32_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp b/library/src/tensor_operation_instance/gpu/pool3d_fwd/pool_fwd_instance_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/quantization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/conv2d_quantization_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_bias_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_int8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_dl_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_bias_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_int8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perchannel_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/conv2d_fwd/device_conv2d_xdl_perlayer_quantization_int8_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_dl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_instance.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_km_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_kn_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/device_gemm_quantization_xdl_c_shuffle_i8_i8_i8_mk_nk_mn_instance.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp b/library/src/tensor_operation_instance/gpu/quantization/gemm/gemm_quantization_common.hpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_b16_f32_b16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f16_f16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f16_f32_f16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f32_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f32_f64_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_f64_f64_f64_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i32_i8_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise_i8_i8_i8_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_b16_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f16_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_b16_f32_b16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f16_f16_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f16_f32_f16_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp b/library/src/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank3_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f16_f16_instance_rank4_reduce4.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank3_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce1.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce2.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce3.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp b/library/src/tensor_operation_instance/gpu/softmax/device_softmax_f32_f32_instance_rank4_reduce4.cpp old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp b/library/src/tensor_operation_instance/gpu/transpose/device_transpose_instances_3d.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/CMakeLists.txt b/library/src/utility/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/library/src/utility/convolution_parameter.cpp b/library/src/utility/convolution_parameter.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/device_memory.cpp b/library/src/utility/device_memory.cpp old mode 100644 new mode 100755 diff --git a/library/src/utility/host_tensor.cpp b/library/src/utility/host_tensor.cpp old mode 100644 new mode 100755 diff --git a/profiler/CMakeLists.txt b/profiler/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/profiler/README.md b/profiler/README.md old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/data_type_enum.hpp b/profiler/include/profiler/data_type_enum.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_avg_pool3d_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp b/profiler/include/profiler/profile_batched_gemm_add_relu_gemm_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_batched_gemm_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp b/profiler/include/profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_backward_impl.hpp b/profiler/include/profiler/profile_batchnorm_backward_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_forward_impl.hpp b/profiler/include/profiler/profile_batchnorm_forward_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_batchnorm_infer_impl.hpp b/profiler/include/profiler/profile_batchnorm_infer_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_contraction_impl.hpp b/profiler/include/profiler/profile_contraction_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_contraction_utils.hpp b/profiler/include/profiler/profile_contraction_utils.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_conv_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp b/profiler/include/profiler/profile_conv_fwd_bias_relu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_fwd_impl.hpp b/profiler/include/profiler/profile_conv_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp b/profiler/include/profiler/profile_conv_tensor_rearrange_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp b/profiler/include/profiler/profile_elementwise_layernorm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_add_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_add_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_impl.hpp b/profiler/include/profiler/profile_gemm_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp b/profiler/include/profiler/profile_gemm_add_multiply_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_add_layernorm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_relu_impl.hpp b/profiler/include/profiler/profile_gemm_add_relu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_add_silu_impl.hpp b/profiler/include/profiler/profile_gemm_add_silu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_bias_add_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_bilinear_impl.hpp b/profiler/include/profiler/profile_gemm_bilinear_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_gemm_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_impl.hpp b/profiler/include/profiler/profile_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp b/profiler/include/profiler/profile_gemm_multiply_add_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_reduce_impl.hpp b/profiler/include/profiler/profile_gemm_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_splitk_impl.hpp b/profiler/include/profiler/profile_gemm_splitk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_gemm_streamk_impl.hpp b/profiler/include/profiler/profile_gemm_streamk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp b/profiler/include/profiler/profile_grouped_conv_bwd_weight_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp b/profiler/include/profiler/profile_grouped_conv_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fastgelu_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_fixed_nk_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_grouped_gemm_impl.hpp b/profiler/include/profiler/profile_grouped_gemm_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_bwd_data_impl.hpp b/profiler/include/profiler/profile_groupnorm_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_bwd_gamma_beta_impl.hpp b/profiler/include/profiler/profile_groupnorm_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_groupnorm_fwd_impl.hpp b/profiler/include/profiler/profile_groupnorm_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_bwd_data_impl.hpp b/profiler/include/profiler/profile_layernorm_bwd_data_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_bwd_gamma_beta_impl.hpp b/profiler/include/profiler/profile_layernorm_bwd_gamma_beta_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_layernorm_fwd_impl.hpp b/profiler/include/profiler/profile_layernorm_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp b/profiler/include/profiler/profile_max_pool3d_bwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_permute_scale_impl.hpp b/profiler/include/profiler/profile_permute_scale_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_pool3d_fwd_impl.hpp b/profiler/include/profiler/profile_pool3d_fwd_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_reduce_impl.hpp b/profiler/include/profiler/profile_reduce_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_softmax_impl.hpp b/profiler/include/profiler/profile_softmax_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/include/profiler/profile_transpose_impl.hpp b/profiler/include/profiler/profile_transpose_impl.hpp old mode 100644 new mode 100755 diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/profiler/src/profile_avg_pool3d_bwd.cpp b/profiler/src/profile_avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm.cpp b/profiler/src/profile_batched_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp b/profiler/src/profile_batched_gemm_add_relu_gemm_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_gemm.cpp b/profiler/src/profile_batched_gemm_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_multi_d.cpp b/profiler/src/profile_batched_gemm_multi_d.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batched_gemm_reduce.cpp b/profiler/src/profile_batched_gemm_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_bwd.cpp b/profiler/src/profile_batchnorm_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_fwd.cpp b/profiler/src/profile_batchnorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_batchnorm_infer.cpp b/profiler/src/profile_batchnorm_infer.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_contraction_bilinear.cpp b/profiler/src/profile_contraction_bilinear.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_contraction_scale.cpp b/profiler/src/profile_contraction_scale.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_bwd_data.cpp b/profiler/src/profile_conv_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd.cpp b/profiler/src/profile_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd_bias_relu.cpp b/profiler/src/profile_conv_fwd_bias_relu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_fwd_bias_relu_add.cpp b/profiler/src/profile_conv_fwd_bias_relu_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_conv_tensor_rearrange.cpp b/profiler/src/profile_conv_tensor_rearrange.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm.cpp b/profiler/src/profile_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add.cpp b/profiler/src/profile_gemm_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_add_fastgelu.cpp b/profiler/src/profile_gemm_add_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_fastgelu.cpp b/profiler/src/profile_gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_multiply.cpp b/profiler/src/profile_gemm_add_multiply.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_relu.cpp b/profiler/src/profile_gemm_add_relu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_relu_add_layernorm.cpp b/profiler/src/profile_gemm_add_relu_add_layernorm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_add_silu.cpp b/profiler/src/profile_gemm_add_silu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_bias_add_reduce.cpp b/profiler/src/profile_gemm_bias_add_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_bilinear.cpp b/profiler/src/profile_gemm_bilinear.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_fastgelu.cpp b/profiler/src/profile_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_multiply_add.cpp b/profiler/src/profile_gemm_multiply_add.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_reduce.cpp b/profiler/src/profile_gemm_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_splitk.cpp b/profiler/src/profile_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_gemm_streamk.cpp b/profiler/src/profile_gemm_streamk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_bwd_data.cpp b/profiler/src/profile_grouped_conv_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_bwd_weight.cpp b/profiler/src/profile_grouped_conv_bwd_weight.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_conv_fwd.cpp b/profiler/src/profile_grouped_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm.cpp b/profiler/src/profile_grouped_gemm.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm_fastgelu.cpp b/profiler/src/profile_grouped_gemm_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_grouped_gemm_fixed_nk.cpp b/profiler/src/profile_grouped_gemm_fixed_nk.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_bwd_data.cpp b/profiler/src/profile_groupnorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_bwd_gamma_beta.cpp b/profiler/src/profile_groupnorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_groupnorm_fwd.cpp b/profiler/src/profile_groupnorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_bwd_data.cpp b/profiler/src/profile_layernorm_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_bwd_gamma_beta.cpp b/profiler/src/profile_layernorm_bwd_gamma_beta.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_layernorm_fwd.cpp b/profiler/src/profile_layernorm_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_max_pool3d_bwd.cpp b/profiler/src/profile_max_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_max_pool3d_fwd.cpp b/profiler/src/profile_max_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_permute_scale.cpp b/profiler/src/profile_permute_scale.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_reduce.cpp b/profiler/src/profile_reduce.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_softmax.cpp b/profiler/src/profile_softmax.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profile_transpose.cpp b/profiler/src/profile_transpose.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profiler.cpp b/profiler/src/profiler.cpp old mode 100644 new mode 100755 diff --git a/profiler/src/profiler_operation_registry.hpp b/profiler/src/profiler_operation_registry.hpp old mode 100644 new mode 100755 diff --git a/rbuild.ini b/rbuild.ini old mode 100644 new mode 100755 diff --git a/requirements.txt b/requirements.txt old mode 100644 new mode 100755 diff --git a/script/cmake-ck-dev.sh b/script/cmake-ck-dev.sh index 51d6f7a30c..13f5f08671 100755 --- a/script/cmake-ck-dev.sh +++ b/script/cmake-ck-dev.sh @@ -11,7 +11,7 @@ cmake -D CMAKE_CXX_FLAGS="-std=c++17 -O3 -ftemplate-backtrace-limit=0 -fPIE -Wno-gnu-line-marker" \ -D CMAKE_BUILD_TYPE=Release \ -D BUILD_DEV=ON \ --D GPU_TARGETS="gfx908;gfx90a;gfx940" \ +-D GPU_TARGETS="gfx942" \ -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \ -D USE_BITINT_EXTENSION_INT4=OFF \ ${MY_PROJECT_SOURCE} diff --git a/script/hip_fatbin_insert b/script/hip_fatbin_insert old mode 100644 new mode 100755 diff --git a/script/process_perf_data.py b/script/process_perf_data.py old mode 100644 new mode 100755 diff --git a/script/redis-cli.conf b/script/redis-cli.conf old mode 100644 new mode 100755 diff --git a/script/test_convnd_fwd.sh b/script/test_convnd_fwd.sh old mode 100644 new mode 100755 diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm/CMakeLists.txt b/test/batched_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm/test_batched_gemm.cpp b/test/batched_gemm/test_batched_gemm.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/CMakeLists.txt b/test/batched_gemm_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp b/test/batched_gemm_gemm/test_batched_gemm_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_multi_d/CMakeLists.txt b/test/batched_gemm_multi_d/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp b/test/batched_gemm_multi_d/test_batched_gemm_multi_d_dl.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_reduce/CMakeLists.txt b/test/batched_gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp b/test/batched_gemm_reduce/batched_gemm_reduce_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/CMakeLists.txt b/test/batched_gemm_softmax_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp b/test/batched_gemm_softmax_gemm/test_batched_gemm_softmax_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt b/test/batched_gemm_softmax_gemm_permute/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_bias_softmax_gemm_permute_util.hpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp b/test/batched_gemm_softmax_gemm_permute/test_batched_gemm_softmax_gemm_permute_util.hpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/CMakeLists.txt b/test/batchnorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_bwd_rank_4.cpp b/test/batchnorm/batchnorm_bwd_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_fwd_rank_4.cpp b/test/batchnorm/batchnorm_fwd_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/batchnorm/batchnorm_infer_rank_4.cpp b/test/batchnorm/batchnorm_infer_rank_4.cpp old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/block_swizzle_test.cpp b/test/block_swizzle_test/block_swizzle_test.cpp old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/rebuild.sh b/test/block_swizzle_test/rebuild.sh old mode 100644 new mode 100755 diff --git a/test/block_swizzle_test/simple_args.h b/test/block_swizzle_test/simple_args.h old mode 100644 new mode 100755 diff --git a/test/block_to_ctile_map/CMakeLists.txt b/test/block_to_ctile_map/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/block_to_ctile_map/test_block_to_ctile_map.cpp b/test/block_to_ctile_map/test_block_to_ctile_map.cpp old mode 100644 new mode 100755 diff --git a/test/contraction/CMakeLists.txt b/test/contraction/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/contraction/test_contraction.cpp b/test/contraction/test_contraction.cpp old mode 100644 new mode 100755 diff --git a/test/contraction/test_contraction_interface.cpp b/test/contraction/test_contraction_interface.cpp old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/CMakeLists.txt b/test/conv_tensor_rearrange/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange.cpp old mode 100644 new mode 100755 diff --git a/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp b/test/conv_tensor_rearrange/test_conv_tensor_rearrange_interface.cpp old mode 100644 new mode 100755 diff --git a/test/conv_util/CMakeLists.txt b/test/conv_util/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/conv_util/conv_util.cpp b/test/conv_util/conv_util.cpp old mode 100644 new mode 100755 diff --git a/test/convnd_bwd_data/CMakeLists.txt b/test/convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/convnd_bwd_data/convnd_bwd_data.cpp b/test/convnd_bwd_data/convnd_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/test/convnd_fwd/CMakeLists.txt b/test/convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/convnd_fwd/convnd_fwd.cpp b/test/convnd_fwd/convnd_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/CMakeLists.txt b/test/data_type/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/data_type/test_bf8.cpp b/test/data_type/test_bf8.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/test_fp8.cpp b/test/data_type/test_fp8.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/test_int4.cpp b/test/data_type/test_int4.cpp old mode 100644 new mode 100755 diff --git a/test/data_type/type_convert_const.cpp b/test/data_type/type_convert_const.cpp old mode 100644 new mode 100755 diff --git a/test/elementwise_normalization/CMakeLists.txt b/test/elementwise_normalization/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp b/test/elementwise_normalization/test_elementwise_layernorm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/CMakeLists.txt b/test/gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_bf16.cpp b/test/gemm/gemm_bf16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp16.cpp b/test/gemm/gemm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp32.cpp b/test/gemm/gemm_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_fp64.cpp b/test/gemm/gemm_fp64.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_int8.cpp b/test/gemm/gemm_int8.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_standalone_xdl_fp16.cpp b/test/gemm/gemm_standalone_xdl_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/gemm_util.hpp b/test/gemm/gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nn_instance.cpp b/test/gemm/instance/gemm_f16_nn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nn_instance.hpp b/test/gemm/instance/gemm_f16_nn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nt_instance.cpp b/test/gemm/instance/gemm_f16_nt_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_nt_instance.hpp b/test/gemm/instance/gemm_f16_nt_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tn_instance.cpp b/test/gemm/instance/gemm_f16_tn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tn_instance.hpp b/test/gemm/instance/gemm_f16_tn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tt_instance.cpp b/test/gemm/instance/gemm_f16_tt_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_f16_tt_instance.hpp b/test/gemm/instance/gemm_f16_tt_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.cpp old mode 100644 new mode 100755 diff --git a/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp b/test/gemm/instance/gemm_wavelet_f16_tn_instance.hpp old mode 100644 new mode 100755 diff --git a/test/gemm/run_gemm_test.inc b/test/gemm/run_gemm_test.inc old mode 100644 new mode 100755 diff --git a/test/gemm_add/CMakeLists.txt b/test/gemm_add/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add.hpp b/test/gemm_add/test_gemm_add.hpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_fastgelu.cpp b/test/gemm_add/test_gemm_add_fastgelu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_relu.cpp b/test/gemm_add/test_gemm_add_relu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_add/test_gemm_add_silu.cpp b/test/gemm_add/test_gemm_add_silu.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_layernorm/CMakeLists.txt b/test/gemm_layernorm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp b/test/gemm_layernorm/test_gemm_add_relu_add_layernorm_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_reduce/CMakeLists.txt b/test/gemm_reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_reduce/gemm_reduce_fp16.cpp b/test/gemm_reduce/gemm_reduce_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/CMakeLists.txt b/test/gemm_split_k/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk.cpp b/test/gemm_split_k/test_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk_ut_cases.inc b/test/gemm_split_k/test_gemm_splitk_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/gemm_split_k/test_gemm_splitk_util.hpp b/test/gemm_split_k/test_gemm_splitk_util.hpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/CMakeLists.txt b/test/grouped_convnd_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_wmma.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp b/test/grouped_convnd_bwd_data/test_grouped_convnd_bwd_data_interface_xdl.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/CMakeLists.txt b/test/grouped_convnd_bwd_weight/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_wmma.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp b/test/grouped_convnd_bwd_weight/test_grouped_convnd_bwd_weight_interface_xdl.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/CMakeLists.txt b/test/grouped_convnd_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_ab_interface.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_ab_interface.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp b/test/grouped_convnd_fwd/test_grouped_convnd_fwd_multi_d_interface_compatibility.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/CMakeLists.txt b/test/grouped_gemm/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_interface.cpp b/test/grouped_gemm/test_grouped_gemm_interface.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_splitk.cpp b/test/grouped_gemm/test_grouped_gemm_splitk.cpp old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_ut_cases.inc b/test/grouped_gemm/test_grouped_gemm_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/grouped_gemm/test_grouped_gemm_util.hpp b/test/grouped_gemm/test_grouped_gemm_util.hpp old mode 100644 new mode 100755 diff --git a/test/magic_number_division/CMakeLists.txt b/test/magic_number_division/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/magic_number_division/magic_number_division.cpp b/test/magic_number_division/magic_number_division.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/CMakeLists.txt b/test/normalization_bwd_data/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/test_groupnorm_bwd_data_fp32.cpp b/test/normalization_bwd_data/test_groupnorm_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_data/test_layernorm2d_bwd_data_fp32.cpp b/test/normalization_bwd_data/test_layernorm2d_bwd_data_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/CMakeLists.txt b/test/normalization_bwd_gamma_beta/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/test_groupnorm_bwd_gamma_beta_fp32.cpp b/test/normalization_bwd_gamma_beta/test_groupnorm_bwd_gamma_beta_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_bwd_gamma_beta/test_layernorm2d_bwd_gamma_beta_fp32.cpp b/test/normalization_bwd_gamma_beta/test_layernorm2d_bwd_gamma_beta_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/CMakeLists.txt b/test/normalization_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_groupnorm_fwd_fp16.cpp b/test/normalization_fwd/test_groupnorm_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_groupnorm_fwd_fp32.cpp b/test/normalization_fwd/test_groupnorm_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm2d_fwd_fp16.cpp b/test/normalization_fwd/test_layernorm2d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm2d_fwd_fp32.cpp b/test/normalization_fwd/test_layernorm2d_fwd_fp32.cpp old mode 100644 new mode 100755 diff --git a/test/normalization_fwd/test_layernorm4d_fwd_fp16.cpp b/test/normalization_fwd/test_layernorm4d_fwd_fp16.cpp old mode 100644 new mode 100755 diff --git a/test/permute_scale/CMakeLists.txt b/test/permute_scale/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/permute_scale/test_permute_scale.cpp b/test/permute_scale/test_permute_scale.cpp old mode 100644 new mode 100755 diff --git a/test/pool/CMakeLists.txt b/test/pool/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/pool/test_avg_pool3d_bwd.cpp b/test/pool/test_avg_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_avg_pool3d_fwd.cpp b/test/pool/test_avg_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_max_pool3d_bwd.cpp b/test/pool/test_max_pool3d_bwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_max_pool3d_fwd.cpp b/test/pool/test_max_pool3d_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/pool/test_pool_fwd_common.hpp b/test/pool/test_pool_fwd_common.hpp old mode 100644 new mode 100755 diff --git a/test/reduce/CMakeLists.txt b/test/reduce/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/reduce/reduce_no_index.cpp b/test/reduce/reduce_no_index.cpp old mode 100644 new mode 100755 diff --git a/test/reduce/reduce_with_index.cpp b/test/reduce/reduce_with_index.cpp old mode 100644 new mode 100755 diff --git a/test/reference_conv_fwd/CMakeLists.txt b/test/reference_conv_fwd/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/reference_conv_fwd/reference_conv_fwd.cpp b/test/reference_conv_fwd/reference_conv_fwd.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/CMakeLists.txt b/test/softmax/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_interface.cpp b/test/softmax/test_softmax_interface.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_rank3.cpp b/test/softmax/test_softmax_rank3.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_rank4.cpp b/test/softmax/test_softmax_rank4.cpp old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_ut_cases.inc b/test/softmax/test_softmax_ut_cases.inc old mode 100644 new mode 100755 diff --git a/test/softmax/test_softmax_util.hpp b/test/softmax/test_softmax_util.hpp old mode 100644 new mode 100755 diff --git a/test/space_filling_curve/CMakeLists.txt b/test/space_filling_curve/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/space_filling_curve/space_filling_curve.cpp b/test/space_filling_curve/space_filling_curve.cpp old mode 100644 new mode 100755 diff --git a/test/transpose/CMakeLists.txt b/test/transpose/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/transpose/test_transpose.cpp b/test/transpose/test_transpose.cpp old mode 100644 new mode 100755 diff --git a/test/wmma_op/CMakeLists.txt b/test/wmma_op/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/wmma_op/wmma_op.cpp b/test/wmma_op/wmma_op.cpp old mode 100644 new mode 100755 diff --git a/test/wmma_op/wmma_op_util.hpp b/test/wmma_op/wmma_op_util.hpp old mode 100644 new mode 100755 diff --git a/test/wrapper/CMakeLists.txt b/test/wrapper/CMakeLists.txt old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_copy.cpp b/test/wrapper/test_wrapper_copy.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_gemm.cpp b/test/wrapper/test_wrapper_gemm.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_layout.cpp b/test/wrapper/test_wrapper_layout.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_partition.cpp b/test/wrapper/test_wrapper_partition.cpp old mode 100644 new mode 100755 diff --git a/test/wrapper/test_wrapper_tensor.cpp b/test/wrapper/test_wrapper_tensor.cpp old mode 100644 new mode 100755