We are now at 6 seconds to build the src folder

This commit is contained in:
Iwan Kawrakow
2025-10-11 10:13:07 +03:00
parent ca73a21a0e
commit 4b71c16a75
6 changed files with 2460 additions and 2340 deletions

View File

@@ -19,6 +19,7 @@ add_library(llama
llama-sampling.cpp
llama-mmap.cpp
llama-model-loader.cpp
llama-load-tensors.cpp
llama-build-context.h
llama-build-context.cpp
llama-model.h

View File

@@ -4,6 +4,7 @@
#include <cstdint>
#include <array>
#include <cmath>
#define LLAMA_MAX_LAYERS 512

2318
src/llama-load-tensors.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,7 @@
#include "llama-model-loader.h"
#include "llama-impl.h"
#include "llama-mmap.h"
#include "llama-model.h"
#include "ggml.h"
//#include "ggml-backend.h"
@@ -20,6 +21,7 @@
#include <map>
#include <array>
#include <future>
#include <regex>
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
@@ -1080,3 +1082,4 @@ template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
template std::enable_if<std::is_integral<unsigned int>::value, bool>::type llama_model_loader::get_arr_n<unsigned int>(enum llm_kv, unsigned int&, bool);

View File

@@ -10,6 +10,7 @@
#include <stdexcept>
#include <unordered_map>
#include <vector>
#include <map>
enum llama_fver {
GGUF_FILE_VERSION_V1 = 1,
@@ -29,6 +30,8 @@ static const char * llama_file_version_name(llama_fver version) {
using llama_buf_map = std::unordered_map<uint32_t, ggml_backend_buffer_t>;
struct llama_layer;
struct llama_model_loader {
int n_kv = 0;
int n_tensors = 0;
@@ -171,3 +174,127 @@ struct llama_model_loader {
void llm_load_arch(llama_model_loader & ml, llama_model & model);
void llm_load_hparams(llama_model_loader & ml, llama_model & model);
struct LLM_TN;
struct load_lensor_helper {
load_lensor_helper(llama_model_loader & ml, llama_model & model);
bool load_llama_tensors(const LLM_TN & tn);
bool load_deci_tensors(const LLM_TN & tn);
bool load_llama4_tensors(const LLM_TN & tn);
bool load_grok_tensors(const LLM_TN & tn);
bool load_dbrx_tensors(const LLM_TN & tn);
bool load_baichuan_tensors(const LLM_TN & tn, bool with_ffn_norm = true);
bool load_falcon_tensors(const LLM_TN & tn);
bool load_starcoder_tensors(const LLM_TN & tn);
bool load_bert_tensors(const LLM_TN & tn);
bool load_jina_bert2_tensors(const LLM_TN & tn);
bool load_bloom_tensors(const LLM_TN & tn);
bool load_mpt_tensors(const LLM_TN & tn);
bool load_stablelm_tensors(const LLM_TN & tn);
bool load_qwen_tensors(const LLM_TN & tn);
bool load_qwen2_tensors(const LLM_TN & tn);
bool load_qwen2_moe_tensors(const LLM_TN & tn);
bool load_qwen3_tensors(const LLM_TN & tn);
bool load_qwen3_moe_tensors(const LLM_TN & tn);
bool load_phi2_tensors(const LLM_TN & tn);
bool load_phi3_tensors(const LLM_TN & tn);
bool load_gpt2_tensors(const LLM_TN & tn);
bool load_codeshell_tensors(const LLM_TN & tn);
bool load_orion_tensors(const LLM_TN & tn);
bool load_internlm_tensors(const LLM_TN & tn);
bool load_gemma_tensors(const LLM_TN & tn, int version);
bool load_starcoder2_tensors(const LLM_TN & tn);
bool load_mamba_tensors(const LLM_TN & tn);
bool load_xverse_tensors(const LLM_TN & tn);
bool load_command_r_tensors(const LLM_TN & tn);
bool load_olmo_tensors(const LLM_TN & tn);
bool load_openelm_tensors(const LLM_TN & tn);
bool load_gptneox_tensors(const LLM_TN & tn);
bool load_arctix_tensors(const LLM_TN & tn);
bool load_deepseek2_tensors(const LLM_TN & tn);
bool load_glm4_tensors(const LLM_TN & tn);
bool load_glm4_moe_tensors(const LLM_TN & tn);
bool load_bitnet_tensors(const LLM_TN & tn);
bool load_bitnet2_tensors(const LLM_TN & tn);
bool load_t5_tensors(const LLM_TN & tn);
bool load_tsencoder_tensors(const LLM_TN & tn);
bool load_jais_tensors(const LLM_TN & tn);
bool load_chatglm_tensors(const LLM_TN & tn);
bool load_cohere2_tensors(const LLM_TN & tn);
bool load_dots1_tensors(const LLM_TN & tn);
bool load_ernie45_tensors(const LLM_TN & tn);
bool load_hunyuan_tensors(const LLM_TN & tn);
bool load_openai_moe_tensors(const LLM_TN & tn);
bool load_tensors();
llama_model_loader & ml;
llama_model & model;
ggml_tensor * create_tensor(ggml_context * ctx, const std::string & name, const std::vector<int64_t> & ne, int flags = 0);
void load_default_embd_output(const LLM_TN & tn, int n_embd, int n_vocab, bool norm_bias);
void load_embd_output(const LLM_TN & tn, int n_embd, int n_vocab, bool has_norm = true);
void load_std_attn(int i, const LLM_TN & tn, llama_layer & layer, int n_embd, int n_embd_gqa, ggml_context * ctx_split);
void load_std_ffn(int i, const LLM_TN & tn, llama_layer & layer, int n_ff, int n_embd, ggml_context * ctx_split);
inline ggml_context * ctx_for_layer(int i) const;
inline ggml_context * ctx_for_layer_split(int i) const;
std::map<ggml_backend_buffer_type_t, int> buft_layer_count;
std::map<ggml_backend_buffer_type_t, ggml_context *> ctx_map;
size_t ctx_size;
ggml_context * ctx_input;
ggml_context * ctx_output;
ggml_context * ctx_output_split;
};

File diff suppressed because it is too large Load Diff