Ext: Fix CUDA type cast

The __half_as_ushort function isn't present in cuda < 12.4

Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
This commit is contained in:
kingbri
2025-05-12 12:36:40 -04:00
parent 9d621509ab
commit 0a7733110e

View File

@@ -416,7 +416,7 @@ void QMoEMLP::forward_
for (int i = 0; i < num_experts; i++)
{
uint16_t w = __half_as_ushort(h_logits[i]);
uint16_t w = *reinterpret_cast<const uint16_t*>(&h_logits[i]);
if (!w) continue;
gemm_half_q_half_cuda(stream, cublas_handle, temp_state, w1[i], temp_a, rows, intermediate_size, columns, true, temp_dq, true, temp_logits + i, num_experts, false);