mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 22:54:10 +00:00
We are now at 6 seconds to build the src folder
This commit is contained in:
@@ -19,6 +19,7 @@ add_library(llama
|
||||
llama-sampling.cpp
|
||||
llama-mmap.cpp
|
||||
llama-model-loader.cpp
|
||||
llama-load-tensors.cpp
|
||||
llama-build-context.h
|
||||
llama-build-context.cpp
|
||||
llama-model.h
|
||||
|
||||
@@ -4,6 +4,7 @@
|
||||
|
||||
#include <cstdint>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
|
||||
#define LLAMA_MAX_LAYERS 512
|
||||
|
||||
|
||||
2318
src/llama-load-tensors.cpp
Normal file
2318
src/llama-load-tensors.cpp
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,7 @@
|
||||
#include "llama-model-loader.h"
|
||||
#include "llama-impl.h"
|
||||
#include "llama-mmap.h"
|
||||
#include "llama-model.h"
|
||||
#include "ggml.h"
|
||||
//#include "ggml-backend.h"
|
||||
|
||||
@@ -20,6 +21,7 @@
|
||||
#include <map>
|
||||
#include <array>
|
||||
#include <future>
|
||||
#include <regex>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
@@ -1080,3 +1082,4 @@ template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv
|
||||
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
|
||||
|
||||
template std::enable_if<std::is_integral<unsigned int>::value, bool>::type llama_model_loader::get_arr_n<unsigned int>(enum llm_kv, unsigned int&, bool);
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@
|
||||
#include <stdexcept>
|
||||
#include <unordered_map>
|
||||
#include <vector>
|
||||
#include <map>
|
||||
|
||||
enum llama_fver {
|
||||
GGUF_FILE_VERSION_V1 = 1,
|
||||
@@ -29,6 +30,8 @@ static const char * llama_file_version_name(llama_fver version) {
|
||||
|
||||
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
|
||||
|
||||
struct llama_layer;
|
||||
|
||||
struct llama_model_loader {
|
||||
int n_kv = 0;
|
||||
int n_tensors = 0;
|
||||
@@ -171,3 +174,127 @@ struct llama_model_loader {
|
||||
void llm_load_arch(llama_model_loader & ml, llama_model & model);
|
||||
|
||||
void llm_load_hparams(llama_model_loader & ml, llama_model & model);
|
||||
|
||||
struct LLM_TN;
|
||||
|
||||
struct load_lensor_helper {
|
||||
load_lensor_helper(llama_model_loader & ml, llama_model & model);
|
||||
|
||||
bool load_llama_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_deci_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_llama4_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_grok_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_dbrx_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_baichuan_tensors(const LLM_TN & tn, bool with_ffn_norm = true);
|
||||
|
||||
bool load_falcon_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_starcoder_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_bert_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_jina_bert2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_bloom_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_mpt_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_stablelm_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_qwen_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_qwen2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_qwen2_moe_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_qwen3_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_qwen3_moe_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_phi2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_phi3_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_gpt2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_codeshell_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_orion_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_internlm_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_gemma_tensors(const LLM_TN & tn, int version);
|
||||
|
||||
bool load_starcoder2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_mamba_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_xverse_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_command_r_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_olmo_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_openelm_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_gptneox_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_arctix_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_deepseek2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_glm4_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_glm4_moe_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_bitnet_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_bitnet2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_t5_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_tsencoder_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_jais_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_chatglm_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_cohere2_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_dots1_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_ernie45_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_hunyuan_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_openai_moe_tensors(const LLM_TN & tn);
|
||||
|
||||
bool load_tensors();
|
||||
|
||||
llama_model_loader & ml;
|
||||
llama_model & model;
|
||||
|
||||
ggml_tensor * create_tensor(ggml_context * ctx, const std::string & name, const std::vector<int64_t> & ne, int flags = 0);
|
||||
|
||||
void load_default_embd_output(const LLM_TN & tn, int n_embd, int n_vocab, bool norm_bias);
|
||||
void load_embd_output(const LLM_TN & tn, int n_embd, int n_vocab, bool has_norm = true);
|
||||
|
||||
void load_std_attn(int i, const LLM_TN & tn, llama_layer & layer, int n_embd, int n_embd_gqa, ggml_context * ctx_split);
|
||||
void load_std_ffn(int i, const LLM_TN & tn, llama_layer & layer, int n_ff, int n_embd, ggml_context * ctx_split);
|
||||
|
||||
inline ggml_context * ctx_for_layer(int i) const;
|
||||
inline ggml_context * ctx_for_layer_split(int i) const;
|
||||
|
||||
std::map<ggml_backend_buffer_type_t, int> buft_layer_count;
|
||||
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
|
||||
size_t ctx_size;
|
||||
|
||||
ggml_context * ctx_input;
|
||||
ggml_context * ctx_output;
|
||||
ggml_context * ctx_output_split;
|
||||
};
|
||||
|
||||
2350
src/llama.cpp
2350
src/llama.cpp
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user