diff --git a/src/llama.cpp b/src/llama.cpp index 46e551ff..dd28d39d 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -6177,7 +6177,12 @@ static void llm_load_vocab( } // default special tokens - vocab.special_bos_id = 11; + if(model.arch == LLM_ARCH_DOTS1) { + vocab.special_bos_id = -1; + } + else { + vocab.special_bos_id = 11; + } vocab.special_eos_id = 11; vocab.special_unk_id = -1; vocab.special_sep_id = -1; @@ -17009,7 +17014,7 @@ struct llm_build_context { Qcur = ggml_reshape_3d(ctx0, Qcur, n_embd_head, n_head, n_tokens); Kcur = ggml_reshape_3d(ctx0, Kcur, n_embd_head, n_head_kv, n_tokens); - Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); + //Vcur = ggml_reshape_3d(ctx0, Vcur, n_embd_head, n_head_kv, n_tokens); Qcur = llm_build_norm(ctx0, Qcur, hparams, model.layers[il].attn_q_norm, NULL, LLM_NORM_RMS, cb, il); cb(Qcur, "Qcur_normed", il); @@ -17031,7 +17036,7 @@ struct llm_build_context { cb(Qcur, "Qcur", il); cb(Kcur, "Kcur", il); - cb(Vcur, "Vcur", il); + //cb(Vcur, "Vcur", il); cur = llm_build_kv(ctx0, lctx, kv_self, gf, model.layers[il].wo, model.layers[il].bo, @@ -17217,7 +17222,7 @@ static struct ggml_cgraph * llama_build_graph( const llama_vocab * vocab = llama_get_vocab(&lctx); llama_token bos = llama_token_bos_impl(*vocab); llama_token eos = llama_token_eos_impl(*vocab); - bool is_warming_up = (batch.n_tokens == 1 && batch.token[0] == bos); + bool is_warming_up = (batch.n_tokens == 1 && (batch.token[0] == ((bos != -1) ? bos : eos))); struct llm_build_context llm(lctx, batch, cb, worst_case, is_warming_up); llm.init();