mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-23 06:34:13 +00:00
* grammar : fix JSON Schema for string regex with top-level alt. (#9903) Prior to this commit, using a JSON Schema containing a string with `pattern` regular expression that uses top-level alternation (e.g. `"pattern": "^A|B|C|D$"`) would result in invalid JSON output from the constrained sampling grammar, because it ended up creating a grammar rule like this for the string: ``` thing ::= "\"" "A" | "B" | "C" | "D" "\"" space ``` Note that this rule will only match a starting quote for the "A" case, and will only match an ending quote for the "D" case, so this rule will always produce invalid JSON when used for sampling (that is, the JSON will always be lacking the starting quote, the ending quote, or both). This was fixed in a simple way by adding parentheses to the generated rule (for all string pattern rules, to keep it simple), such that the new generated rule looks like this (correct): ``` thing ::= "\"" ("A" | "B" | "C" | "D") "\"" space ``` * grammars : add English-only grammar (#10612) * grammar : handle maxItems == 0 in JSON schema (#13117) Co-authored-by: Richard Lyons <frob@cloudstaff.com> * grammar-parser : fix possible null-deref (#9004) Fixes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=70680 Signed-off-by: David Korczynski <david@adalogics.com> * llama : fix typo in llama-grammar.h [no ci] (#11816) * * server: fix "--grammar-file" parameter (#12285) * common : use std::string_view now that we target c++17 (#14319) * json : support `enum` values within `allOf` (#15830) * grammar : use int64_t to avoid int overflows in int schema to grammar conversion logic (#16626) * grammar : support array references in json schema (#16792) * grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> # Conflicts: # tests/test-json-schema-to-grammar.cpp * merge fix * llama : minor grammar refactor (#10897) * llama: fix error on bad grammar (#12628) * grammar : fix integer overflow (#17381) * Fix DoS / integer overflow * Remove optional, use INT64_MAX instead as placeholder value (it's technically -1, so it fits :) * White space * Actually, since it's unsigned, use UINT64_MAX # Conflicts: # src/llama-grammar.cpp * grammar: fix regression caused by #17381 (#17412) * grammar: fix regression caused by #17381 * more readable # Conflicts: # src/llama-grammar.cpp * Merge Fix * Fix warnings --------- Signed-off-by: David Korczynski <david@adalogics.com> Co-authored-by: Joe Eli McIlvain <joe.eli.mac@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: frob <rick+github@frob.com.au> Co-authored-by: Richard Lyons <frob@cloudstaff.com> Co-authored-by: DavidKorczynski <david@adalogics.com> Co-authored-by: Daniel Bevenius <daniel.bevenius@gmail.com> Co-authored-by: firecoperana <firecoperana> Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> Co-authored-by: Aldehir Rojas <hello@alde.dev> Co-authored-by: Olivier Chafik <olivier.chafik@gmail.com> Co-authored-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com> Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
109 lines
3.4 KiB
C++
109 lines
3.4 KiB
C++
#pragma once
|
|
|
|
#include "llama-impl.h"
|
|
#include <map>
|
|
#include <regex>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
struct llama_vocab;
|
|
struct llama_sampling;
|
|
|
|
struct llama_grammar_parser {
|
|
std::map<std::string, uint32_t> symbol_ids;
|
|
|
|
llama_grammar_rules rules;
|
|
|
|
llama_grammar_stack c_rules() const;
|
|
|
|
uint32_t get_symbol_id(const char* src, size_t len);
|
|
uint32_t generate_symbol_id(const std::string& base_name);
|
|
|
|
void add_rule(uint32_t rule_id, const llama_grammar_rule& rule);
|
|
|
|
const char* parse_alternates(
|
|
const char* src,
|
|
const std::string& rule_name,
|
|
uint32_t rule_id,
|
|
bool is_nested);
|
|
|
|
const char* parse_sequence(
|
|
const char* src,
|
|
const std::string& rule_name,
|
|
llama_grammar_rule& rule,
|
|
bool is_nested);
|
|
|
|
const char* parse_rule(const char* src);
|
|
|
|
bool parse(const char* src);
|
|
void print(FILE* file);
|
|
};
|
|
|
|
struct llama_grammar_trigger_pattern {
|
|
std::string pattern;
|
|
std::regex regex;
|
|
};
|
|
|
|
struct llama_grammar {
|
|
// note: allow null vocab for testing (not great)
|
|
const llama_vocab* vocab;
|
|
|
|
const llama_grammar_rules rules; // TODO: shared ptr
|
|
llama_grammar_stacks stacks;
|
|
|
|
// buffer for partially generated UTF-8 sequence from accepted tokens
|
|
llama_partial_utf8 partial_utf8;
|
|
|
|
// lazy grammars wait for trigger words or tokens before constraining the sampling.
|
|
// we still have trigger_tokens for non-lazy grammars to force printing of special trigger tokens.
|
|
// (useful e.g. for tool_choice=required)
|
|
bool lazy = false;
|
|
bool awaiting_trigger = false; // Initialized to true for lazy grammars only
|
|
std::string trigger_buffer; // Output buffered by lazy grammar. Will be cleared once trigger is found.
|
|
std::vector<llama_token> trigger_tokens; // Tokens that trigger a lazy grammar, or tokens to force printing of (even if special).
|
|
std::vector<llama_grammar_trigger_pattern> trigger_patterns;
|
|
// Regular expressions that trigger a lazy grammar. Must be a full match of the entire generated
|
|
// string, and the grammar will be given the string from the first match group onwards.
|
|
|
|
};
|
|
|
|
//
|
|
// internal API
|
|
//
|
|
// note: needed for tests (not great)
|
|
struct llama_grammar* llama_grammar_init_impl(
|
|
const llama_grammar_element** rules,
|
|
size_t n_rules,
|
|
size_t start_rule_index);
|
|
|
|
struct llama_grammar* llama_grammar_init_impl(
|
|
const struct llama_vocab* vocab,
|
|
const char* grammar_str,
|
|
const char* grammar_root,
|
|
bool lazy,
|
|
const char** trigger_patterns,
|
|
size_t num_trigger_patterns,
|
|
const llama_token* trigger_tokens,
|
|
size_t num_trigger_tokens);
|
|
|
|
void llama_grammar_free_impl(struct llama_grammar * grammar);
|
|
|
|
struct llama_grammar * llama_grammar_copy_impl(const struct llama_grammar * grammar);
|
|
|
|
void llama_grammar_sample_impl(
|
|
const struct llama_grammar * grammar,
|
|
const struct llama_vocab * vocab,
|
|
const struct llama_sampling * smpl,
|
|
llama_token_data_array * candidates);
|
|
|
|
void llama_grammar_accept_token_impl(
|
|
struct llama_grammar * grammar,
|
|
const struct llama_vocab * vocab,
|
|
const struct llama_sampling * smpl,
|
|
llama_token token);
|
|
|
|
|
|
void llama_grammar_accept_str(
|
|
struct llama_grammar* grammar,
|
|
const std::string& piece);
|