mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-14 15:57:37 +00:00
Refactor file llama.cpp (#823)
* llama_model and llama_hparams * llama_build_context Surprisingly small reduction in llama.cpp compile time given the reduction in LOCs (22k -> 14k) * LLM_TN llama.cpp compilation: 50 s -> 33 s * llama_quantize * arch names * All graph building is now in llm-build-context.cpp * hparams loading llama.cpp is now just 9300 LOC, but still takes 32 seconds to compile. * We are now at 6 seconds to build the src folder * load -> create We are not actually loading the tensors, but just creating them. --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
#include "llama-model-loader.h"
|
||||
#include "llama-impl.h"
|
||||
#include "llama-mmap.h"
|
||||
#include "llama-model.h"
|
||||
#include "ggml.h"
|
||||
//#include "ggml-backend.h"
|
||||
|
||||
@@ -20,6 +21,7 @@
|
||||
#include <map>
|
||||
#include <array>
|
||||
#include <future>
|
||||
#include <regex>
|
||||
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
@@ -1080,3 +1082,4 @@ template bool llama_model_loader::get_key_or_arr<std::array<int, 4>>(enum llm_kv
|
||||
template bool llama_model_loader::get_key_or_arr<std::array<uint32_t, 512>>(enum llm_kv kid, std::array<uint32_t, 512> & result, uint32_t n, bool required);
|
||||
|
||||
template std::enable_if<std::is_integral<unsigned int>::value, bool>::type llama_model_loader::get_arr_n<unsigned int>(enum llm_kv, unsigned int&, bool);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user