diff --git a/common/chat.cpp b/common/chat.cpp index 21aa524e..49626ff4 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1279,8 +1279,8 @@ static common_chat_params common_chat_params_init_kimi_k2(const common_chat_temp form.tool_start = "<|tool_call_begin|>"; form.tool_sep = "<|tool_call_argument_begin|>{"; form.key_start = "\""; - form.key_val_sep = "\":"; - form.val_end = ","; + form.key_val_sep = "\": "; + form.val_end = ", "; form.tool_end = "}<|tool_call_end|>"; form.scope_end = "<|tool_calls_section_end|>"; form.raw_argval = false; diff --git a/common/sampling.cpp b/common/sampling.cpp index 0e19de4c..769fc06c 100644 --- a/common/sampling.cpp +++ b/common/sampling.cpp @@ -429,6 +429,12 @@ static llama_token llama_sampling_sample_impl( GGML_ASSERT(!original_logits.empty()); } llama_token id = 0; + // Sample grammar first for resampling + if (ctx_sampling->grammar != NULL && is_resampling) { + float* logits = llama_get_logits_ith(ctx_main, idx); + // Apply grammar constraints to all candidates + llama_grammar_sample(ctx_sampling->grammar, ctx_main, &cur_p); + } if (temp < 0.0) { // greedy sampling, with probs