mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 19:31:48 +00:00
* Much faster iq2_xxs GEMM PP-512 = 290 t/s vs ~110 t/s (iq2_xxs) or 148 t/s (iq2_xxs_r4) on main. * iq2_xxs: q8_2_x4 GEMM * iq2_xxs: use template for q8_2_x4 GEMM * Fix AVX2 * Cleanup * NEON is not working yet, so still use Q8_K GEMM --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
14 lines
313 B
C++
14 lines
313 B
C++
#pragma once
|
|
|
|
#include "iqk_common.h"
|
|
|
|
#ifdef IQK_IMPLEMENT
|
|
|
|
#include <array>
|
|
|
|
bool iqk_set_kernels_iquants(int ne00, int typeA, int typeB, std::array<mul_mat_t, IQK_MAX_NY>& kernels, mul_mat_t& func16);
|
|
|
|
bool iqk_convert_iquants_q80_r8(int type, int n, const void * vx, size_t bx, void * vy, int nrc_x);
|
|
|
|
#endif
|