mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-26 09:29:27 +00:00
* Add mtmd: the beginning * Add mtmd: mtmd.cpp compiles * Add mtmd: clip initialization compiles * Add mtmd: clip.cpp compiles * Add mtmd: builds successfully * Add CPU implementation for GGML_OP_GLU * Add CUDA implementation for GGML_OP_GLU * Add CPU implementation for GGML_OP_CONV_2D and GGML_OP_CONV_2D_DW * Add CUDA implementation for GGML_OP_CONV_2D and GGML_OP_CONV_2D_DW * Add mtmd: refresh CPU rope * Add mtmd: refresh CUDA rope * Add mtmd: add Qwen2-VL * Add mtmd: Qwen2.5-VL text seems to work with this change * Add mtmd: fix swiglu * Add mtmd: use LOG_TEE so generated tokens show up in terminal * Add mtmd: do not attempt to load a GPU backend if none are available * GLU, not GPU * Fix typo * Fix new/free mismatch * LOG stuff * Add mtmd: this fixes gibberish on second image --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
40 lines
1.1 KiB
Plaintext
40 lines
1.1 KiB
Plaintext
//
|
|
// Copyright (C) 2023-2024 The ggml authors
|
|
// Copyright (C) 2024 Iwan Kawrakow
|
|
// MIT license
|
|
// SPDX-License-Identifier: MIT
|
|
//
|
|
|
|
#include "common.cuh"
|
|
|
|
#define CUDA_DEQUANTIZE_BLOCK_SIZE 256
|
|
|
|
template<typename T>
|
|
using to_t_cuda_t = void (*)(const void * __restrict__ x, T * __restrict__ y, int64_t nrows, int64_t n_per_row, cudaStream_t stream);
|
|
|
|
typedef to_t_cuda_t<float> to_fp32_cuda_t;
|
|
typedef to_t_cuda_t<half> to_fp16_cuda_t;
|
|
typedef to_t_cuda_t<nv_bfloat16> to_bf16_cuda_t;
|
|
|
|
to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type);
|
|
|
|
to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type);
|
|
|
|
to_bf16_cuda_t ggml_get_to_bf16_cuda(ggml_type type);
|
|
|
|
template<typename dst_t, typename src_t>
|
|
__host__ __device__ inline dst_t ggml_cuda_cast(src_t x) {
|
|
if constexpr (std::is_same_v<dst_t, src_t>) {
|
|
return x;
|
|
} else if constexpr(std::is_same_v<dst_t, nv_bfloat16>) {
|
|
return __float2bfloat16(float(x));
|
|
} else if constexpr(std::is_same_v<src_t, nv_bfloat16>) {
|
|
return __bfloat162float(x);
|
|
} else if constexpr(std::is_same_v<dst_t, int32_t>) {
|
|
return int32_t(x);
|
|
} else {
|
|
return float(x);
|
|
}
|
|
}
|
|
|