mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 03:41:53 +00:00
12 lines
502 B
Plaintext
12 lines
502 B
Plaintext
#pragma once
|
|
|
|
#include "common.cuh"
|
|
|
|
void ggml_cuda_mul_mat_q_id(
|
|
ggml_backend_cuda_context & ctx, const ggml_tensor * src0, const ggml_tensor * src1, const ggml_tensor * ids,
|
|
ggml_tensor * dst, char * ids_data, char * src1_quantized_data);
|
|
|
|
void compute_row_ids(const int32_t * ids, int32_t * ids_src1, int32_t * ids_dst, int32_t * expert_bounds,
|
|
int64_t ne02, int64_t ne12, int64_t n_expert_used, int64_t ne11, int64_t nb11, int64_t nb12, int64_t nb21, cudaStream_t stream);
|
|
|