mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 14:44:09 +00:00
Tool calls support from mainline (#723)
* Tool calls support from mainline * update cmake * revert api for /completions * Fix broken thinking process for gpt-oss * add missing args and fix webui bugs * add missing args and fix webui bugs2 * Fix reasoning format error * add usage * change default post_sampling_probs to true * add back generated_text * Remove server endpoints tests * add log * Chat fixes * Remove logs * webui: revert extra handling of thinking process --------- Co-authored-by: firecoperana <firecoperana> Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -3,7 +3,7 @@
|
||||
#include "common.h"
|
||||
#include "sampling.h"
|
||||
#include "llama-impl.h"
|
||||
|
||||
#include "llama-vocab.h"
|
||||
#include <cstring>
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
@@ -302,7 +302,7 @@ std::vector<llama_token> llama_speculative_gen_draft(
|
||||
|
||||
llama_decode(ctx_dft, batch);
|
||||
|
||||
llama_sampling_reset(smpl);
|
||||
llama_sampling_reset(llama_get_vocab(ctx_dft), smpl);
|
||||
|
||||
// sample n_draft tokens from the draft model
|
||||
for (int i = 0; i < params.n_draft; ++i) {
|
||||
|
||||
Reference in New Issue
Block a user