mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
* Add mtmd: the beginning * Add mtmd: mtmd.cpp compiles * Add mtmd: clip initialization compiles * Add mtmd: clip.cpp compiles * Add mtmd: builds successfully * Add CPU implementation for GGML_OP_GLU * Add CUDA implementation for GGML_OP_GLU * Add CPU implementation for GGML_OP_CONV_2D and GGML_OP_CONV_2D_DW * Add CUDA implementation for GGML_OP_CONV_2D and GGML_OP_CONV_2D_DW * Add mtmd: refresh CPU rope * Add mtmd: refresh CUDA rope * Add mtmd: add Qwen2-VL * Add mtmd: Qwen2.5-VL text seems to work with this change * Add mtmd: fix swiglu * Add mtmd: use LOG_TEE so generated tokens show up in terminal * Add mtmd: do not attempt to load a GPU backend if none are available * GLU, not GPU * Fix typo * Fix new/free mismatch * LOG stuff * Add mtmd: this fixes gibberish on second image --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
48 lines
858 B
C++
48 lines
858 B
C++
#pragma once
|
|
|
|
#include "ggml.h"
|
|
|
|
#include <cstdint>
|
|
#include <vector>
|
|
#include <string>
|
|
|
|
#define WHISPER_ASSERT GGML_ASSERT
|
|
|
|
#define WHISPER_SAMPLE_RATE 16000
|
|
#define WHISPER_N_FFT 400
|
|
#define WHISPER_HOP_LENGTH 160
|
|
#define WHISPER_CHUNK_SIZE 30
|
|
|
|
#define COMMON_SAMPLE_RATE 16000
|
|
|
|
namespace whisper_preprocessor {
|
|
|
|
struct whisper_mel {
|
|
int n_len;
|
|
int n_len_org;
|
|
int n_mel;
|
|
|
|
std::vector<float> data;
|
|
};
|
|
|
|
struct whisper_filters {
|
|
int32_t n_mel;
|
|
int32_t n_fft;
|
|
|
|
std::vector<float> data;
|
|
};
|
|
|
|
bool preprocess_audio(
|
|
const float * samples,
|
|
size_t n_samples,
|
|
const whisper_filters & filters,
|
|
std::vector<whisper_mel> & output);
|
|
|
|
} // namespace whisper_preprocessor
|
|
|
|
namespace whisper_precalc_filters {
|
|
|
|
whisper_preprocessor::whisper_filters get_128_bins();
|
|
|
|
} // namespace whisper_precalc_filters
|