Files
ik_llama.cpp/tests/test-json-schema-to-grammar.cpp
firecoperana 52adcf1e90 Update grammar (#1023)
* grammar : fix JSON Schema for string regex with top-level alt. (#9903)

Prior to this commit, using a JSON Schema containing a string
with `pattern` regular expression that uses top-level alternation
(e.g. `"pattern": "^A|B|C|D$"`) would result in invalid JSON
output from the constrained sampling grammar, because it
ended up creating a grammar rule like this for the string:

```
thing ::= "\"" "A" | "B" | "C" | "D" "\"" space
```

Note that this rule will only match a starting quote for the "A" case,
and will only match an ending quote for the "D" case,
so this rule will always produce invalid JSON when used for sampling
(that is, the JSON will always be lacking the starting quote,
the ending quote, or both).

This was fixed in a simple way by adding parentheses to the
generated rule (for all string pattern rules, to keep it simple),
such that the new generated rule looks like this (correct):

```
thing ::= "\"" ("A" | "B" | "C" | "D") "\"" space
```

* grammars : add English-only grammar (#10612)

* grammar : handle maxItems == 0 in JSON schema (#13117)

Co-authored-by: Richard Lyons <frob@cloudstaff.com>

* grammar-parser : fix possible null-deref (#9004)

Fixes: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=70680

Signed-off-by: David Korczynski <david@adalogics.com>

* llama : fix typo in llama-grammar.h [no ci] (#11816)

* * server: fix "--grammar-file" parameter (#12285)

* common : use std::string_view now that we target c++17 (#14319)

* json : support `enum` values within `allOf` (#15830)

* grammar : use int64_t to avoid int overflows in int schema to grammar conversion logic (#16626)

* grammar : support array references in json schema (#16792)

* grammar : support array references in json schema

* Update json-schema-to-grammar.cpp

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>

* grammar : improve regex when naming ref derived rules

* grammar : replace non-conformant definitions array with anyOf test case

---------

Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
# Conflicts:
#	tests/test-json-schema-to-grammar.cpp

* merge fix

* llama : minor grammar refactor (#10897)

* llama: fix error on bad grammar (#12628)

* grammar : fix integer overflow (#17381)

* Fix DoS / integer overflow

* Remove optional, use INT64_MAX instead as placeholder value (it's technically -1, so it fits :)

* White space

* Actually, since it's unsigned, use UINT64_MAX
# Conflicts:
#	src/llama-grammar.cpp

* grammar: fix regression caused by #17381 (#17412)

* grammar: fix regression caused by #17381

* more readable
# Conflicts:
#	src/llama-grammar.cpp

* Merge Fix

* Fix warnings

---------

Signed-off-by: David Korczynski <david@adalogics.com>
Co-authored-by: Joe Eli McIlvain <joe.eli.mac@gmail.com>
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: frob <rick+github@frob.com.au>
Co-authored-by: Richard Lyons <frob@cloudstaff.com>
Co-authored-by: DavidKorczynski <david@adalogics.com>
Co-authored-by: Daniel Bevenius <daniel.bevenius@gmail.com>
Co-authored-by: firecoperana <firecoperana>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
Co-authored-by: Aldehir Rojas <hello@alde.dev>
Co-authored-by: Olivier Chafik <olivier.chafik@gmail.com>
Co-authored-by: Piotr Wilkin (ilintar) <piotr.wilkin@syndatis.com>
Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2025-11-30 18:45:38 +01:00

1386 lines
42 KiB
C++
Executable File

#ifdef NDEBUG
#undef NDEBUG
#endif
#include <cassert>
#include <fstream>
#include <sstream>
#include <regex>
#include "json-schema-to-grammar.h"
#include "grammar-parser.h"
static std::string trim(const std::string & source) {
std::string s(source);
s.erase(0,s.find_first_not_of(" \n\r\t"));
s.erase(s.find_last_not_of(" \n\r\t")+1);
return std::regex_replace(s, std::regex("(^|\n)[ \t]+"), "$1");
}
enum TestCaseStatus {
SUCCESS,
FAILURE
};
struct TestCase {
TestCaseStatus expected_status;
std::string name;
std::string schema;
std::string expected_grammar;
void _print_failure_header() const {
fprintf(stderr, "#\n# Test '%s' failed.\n#\n%s\n", name.c_str(), schema.c_str());
}
void verify(const std::string & actual_grammar) const {
if (trim(actual_grammar) != trim(expected_grammar)) {
_print_failure_header();
fprintf(stderr, "# EXPECTED:\n%s\n# ACTUAL:\n%s\n", expected_grammar.c_str(), actual_grammar.c_str());
assert(false);
}
}
void verify_expectation_parseable() const {
try {
auto state = grammar_parser::parse(expected_grammar.c_str());
if (state.symbol_ids.find("root") == state.symbol_ids.end()) {
throw std::runtime_error("Grammar failed to parse:\n" + expected_grammar);
}
} catch (const std::runtime_error & ex) {
_print_failure_header();
fprintf(stderr, "# GRAMMAR ERROR: %s\n", ex.what());
assert(false);
}
}
void verify_status(TestCaseStatus status) const {
if (status != expected_status) {
_print_failure_header();
fprintf(stderr, "# EXPECTED STATUS: %s\n", expected_status == SUCCESS ? "SUCCESS" : "FAILURE");
fprintf(stderr, "# ACTUAL STATUS: %s\n", status == SUCCESS ? "SUCCESS" : "FAILURE");
assert(false);
}
}
};
static void write(const std::string & file, const std::string & content) {
std::ofstream f;
f.open(file.c_str());
f << content.c_str();
f.close();
}
static std::string read(const std::string & file) {
std::ostringstream actuals;
actuals << std::ifstream(file.c_str()).rdbuf();
return actuals.str();
}
static void test_all(const std::string & lang, std::function<void(const TestCase &)> runner) {
fprintf(stderr, "#\n# Testing JSON schema conversion (%s)\n#\n", lang.c_str());
auto test = [&](const TestCase & tc) {
fprintf(stderr, "- %s%s\n", tc.name.c_str(), tc.expected_status == FAILURE ? " (failure expected)" : "");
runner(tc);
};
test({
SUCCESS,
"min 0",
R"""({
"type": "integer",
"minimum": 0
})""",
R"""(
root ::= ([0] | [1-9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 1",
R"""({
"type": "integer",
"minimum": 1
})""",
R"""(
root ::= ([1-9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 3",
R"""({
"type": "integer",
"minimum": 3
})""",
R"""(
root ::= ([1-2] [0-9]{1,15} | [3-9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 9",
R"""({
"type": "integer",
"minimum": 9
})""",
R"""(
root ::= ([1-8] [0-9]{1,15} | [9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 10",
R"""({
"type": "integer",
"minimum": 10
})""",
R"""(
root ::= ([1] ([0-9]{1,15}) | [2-9] [0-9]{1,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 25",
R"""({
"type": "integer",
"minimum": 25
})""",
R"""(
root ::= ([1] [0-9]{2,15} | [2] ([0-4] [0-9]{1,14} | [5-9] [0-9]{0,14}) | [3-9] [0-9]{1,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"max 30",
R"""({
"type": "integer",
"maximum": 30
})""",
R"""(
root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-2] [0-9] | [3] "0")) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min -5",
R"""({
"type": "integer",
"minimum": -5
})""",
R"""(
root ::= ("-" ([0-5]) | [0] | [1-9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min -123",
R"""({
"type": "integer",
"minimum": -123
})""",
R"""(
root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0] | [1-9] [0-9]{0,15}) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"max -5",
R"""({
"type": "integer",
"maximum": -5
})""",
R"""(
root ::= ("-" ([0-4] [0-9]{1,15} | [5-9] [0-9]{0,15})) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"max 1",
R"""({
"type": "integer",
"maximum": 1
})""",
R"""(
root ::= ("-" [1-9] [0-9]{0,15} | [0-1]) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"max 100",
R"""({
"type": "integer",
"maximum": 100
})""",
R"""(
root ::= ("-" [1-9] [0-9]{0,15} | [0-9] | ([1-8] [0-9] | [9] [0-9]) | "100") space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 0 max 23",
R"""({
"type": "integer",
"minimum": 0,
"maximum": 23
})""",
R"""(
root ::= ([0-9] | ([1] [0-9] | [2] [0-3])) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 15 max 300",
R"""({
"type": "integer",
"minimum": 15,
"maximum": 300
})""",
R"""(
root ::= (([1] ([5-9]) | [2-9] [0-9]) | ([1-2] [0-9]{2} | [3] "00")) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min 5 max 30",
R"""({
"type": "integer",
"minimum": 5,
"maximum": 30
})""",
R"""(
root ::= ([5-9] | ([1-2] [0-9] | [3] "0")) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min -123 max 42",
R"""({
"type": "integer",
"minimum": -123,
"maximum": 42
})""",
R"""(
root ::= ("-" ([0-9] | ([1-8] [0-9] | [9] [0-9]) | "1" ([0-1] [0-9] | [2] [0-3])) | [0-9] | ([1-3] [0-9] | [4] [0-2])) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min -10 max 10",
R"""({
"type": "integer",
"minimum": -10,
"maximum": 10
})""",
R"""(
root ::= ("-" ([0-9] | "10") | [0-9] | "10") space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
FAILURE,
"unknown type",
R"""({
"type": "kaboom"
})""",
""
});
test({
FAILURE,
"invalid type",
R"""({
"type": 123
})""",
""
});
test({
SUCCESS,
"empty schema (object)",
"{}",
R"""(
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
});
test({
SUCCESS,
"exotic formats",
R"""({
"items": [
{ "format": "date" },
{ "format": "uuid" },
{ "format": "time" },
{ "format": "date-time" }
]
})""",
R"""(
date ::= [0-9]{4} "-" ( "0" [1-9] | "1" [0-2] ) "-" ( "0" [1-9] | [1-2] [0-9] | "3" [0-1] )
date-string ::= "\"" date "\"" space
date-time ::= date "T" time
date-time-string ::= "\"" date-time "\"" space
root ::= "[" space tuple-0 "," space uuid "," space tuple-2 "," space tuple-3 "]" space
space ::= | " " | "\n" [ \t]{0,20}
time ::= ([01] [0-9] | "2" [0-3]) ":" [0-5] [0-9] ":" [0-5] [0-9] ( "." [0-9]{3} )? ( "Z" | ( "+" | "-" ) ( [01] [0-9] | "2" [0-3] ) ":" [0-5] [0-9] )
time-string ::= "\"" time "\"" space
tuple-0 ::= date-string
tuple-2 ::= time-string
tuple-3 ::= date-time-string
uuid ::= "\"" [0-9a-fA-F]{8} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{4} "-" [0-9a-fA-F]{12} "\"" space
)"""
});
test({
SUCCESS,
"string",
R"""({
"type": "string"
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char* "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string w/ min length 1",
R"""({
"type": "string",
"minLength": 1
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char+ "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string w/ min length 3",
R"""({
"type": "string",
"minLength": 3
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{3,} "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string w/ max length",
R"""({
"type": "string",
"maxLength": 3
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{0,3} "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string w/ min & max length",
R"""({
"type": "string",
"minLength": 1,
"maxLength": 4
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "\"" char{1,4} "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"boolean",
R"""({
"type": "boolean"
})""",
R"""(
root ::= ("true" | "false") space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"integer",
R"""({
"type": "integer"
})""",
R"""(
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= ("-"? integral-part) space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string const",
R"""({
"const": "foo"
})""",
R"""(
root ::= "\"foo\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"non-string const",
R"""({
"const": 123
})""",
R"""(
root ::= "123" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"non-string enum",
R"""({
"enum": ["red", "amber", "green", null, 42, ["foo"]]
})""",
R"""(
root ::= ("\"red\"" | "\"amber\"" | "\"green\"" | "null" | "42" | "[\"foo\"]") space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"string array",
R"""({
"type": "array",
"prefixItems": { "type": "string" }
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "[" space (string ("," space string)*)? "]" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"nullable string array",
R"""({
"type": ["array", "null"],
"prefixItems": { "type": "string" }
})""",
R"""(
alternative-0 ::= "[" space (string ("," space string)*)? "]" space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
null ::= "null" space
root ::= alternative-0 | null
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"tuple1",
R"""({
"prefixItems": [{ "type": "string" }]
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "[" space string "]" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"tuple2",
R"""({
"prefixItems": [{ "type": "string" }, { "type": "number" }]
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space string "," space number "]" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"number",
R"""({
"type": "number"
})""",
R"""(
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"minItems",
R"""({
"items": {
"type": "boolean"
},
"minItems": 2
})""",
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space boolean ("," space boolean)+ "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"maxItems 0",
R"""({
"items": {
"type": "boolean"
},
"maxItems": 0
})""",
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space "]" space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
)"""
});
test({
SUCCESS,
"maxItems 1",
R"""({
"items": {
"type": "boolean"
},
"maxItems": 1
})""",
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space boolean? "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"maxItems 2",
R"""({
"items": {
"type": "boolean"
},
"maxItems": 2
})""",
R"""(
boolean ::= ("true" | "false") space
root ::= "[" space (boolean ("," space boolean)?)? "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min + maxItems",
R"""({
"items": {
"type": ["number", "integer"]
},
"minItems": 3,
"maxItems": 5
})""",
R"""(
decimal-part ::= [0-9]{1,16}
integer ::= ("-"? integral-part) space
integral-part ::= [0] | [1-9] [0-9]{0,15}
item ::= number | integer
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "[" space item ("," space item){2,4} "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min + max items with min + max values across zero",
R"""({
"items": {
"type": "integer",
"minimum": -12,
"maximum": 207
},
"minItems": 3,
"maxItems": 5
})""",
R"""(
item ::= ("-" ([0-9] | "1" [0-2]) | [0-9] | ([1-8] [0-9] | [9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space
root ::= "[" space item ("," space item){2,4} "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"min + max items with min + max values",
R"""({
"items": {
"type": "integer",
"minimum": 12,
"maximum": 207
},
"minItems": 3,
"maxItems": 5
})""",
R"""(
item ::= (([1] ([2-9]) | [2-9] [0-9]) | ([1] [0-9]{2} | [2] "0" [0-7])) space
root ::= "[" space item ("," space item){2,4} "]" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"simple regexp",
R"""({
"type": "string",
"pattern": "^abc?d*efg+(hij)?kl$"
})""",
R"""(
root ::= "\"" ("ab" "c"? "d"* "ef" "g"+ ("hij")? "kl") "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"regexp escapes",
R"""({
"type": "string",
"pattern": "^\\[\\]\\{\\}\\(\\)\\|\\+\\*\\?$"
})""",
R"""(
root ::= "\"" ("[]{}()|+*?") "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"regexp quote",
R"""({
"type": "string",
"pattern": "^\"$"
})""",
R"""(
root ::= "\"" ("\"") "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"regexp with top-level alternation",
R"""({
"type": "string",
"pattern": "^A|B|C|D$"
})""",
R"""(
root ::= "\"" ("A" | "B" | "C" | "D") "\"" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"regexp",
R"""({
"type": "string",
"pattern": "^(\\([0-9]{1,3}\\))?[0-9]{3}-[0-9]{4} a{3,5}nd...$"
})""",
R"""(
dot ::= [^\x0A\x0D]
root ::= "\"" (("(" root-1{1,3} ")")? root-1{3,3} "-" root-1{4,4} " " "a"{3,5} "nd" dot dot dot) "\"" space
root-1 ::= [0-9]
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"required props in original order",
R"""({
"type": "object",
"properties": {
"b": {"type": "string"},
"c": {"type": "string"},
"a": {"type": "string"}
},
"required": [
"a",
"b",
"c"
],
"additionalProperties": false,
"definitions": {}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space b-kv "," space c-kv "," space a-kv "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"1 optional prop",
R"""({
"properties": {
"a": {
"type": "string"
}
},
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space (a-kv )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"N optional props",
R"""({
"properties": {
"a": {"type": "string"},
"b": {"type": "string"},
"c": {"type": "string"}
},
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
a-rest ::= ( "," space b-kv )? b-rest
b-kv ::= "\"b\"" space ":" space string
b-rest ::= ( "," space c-kv )?
c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
root ::= "{" space (a-kv a-rest | b-kv b-rest | c-kv )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"required + optional props each in original order",
R"""({
"properties": {
"b": {"type": "string"},
"a": {"type": "string"},
"d": {"type": "string"},
"c": {"type": "string"}
},
"required": ["a", "b"],
"additionalProperties": false
})""",
R"""(
a-kv ::= "\"a\"" space ":" space string
b-kv ::= "\"b\"" space ":" space string
c-kv ::= "\"c\"" space ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
d-kv ::= "\"d\"" space ":" space string
d-rest ::= ( "," space c-kv )?
root ::= "{" space b-kv "," space a-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"additional props",
R"""({
"type": "object",
"additionalProperties": {"type": "array", "items": {"type": "number"}}
})""",
R"""(
additional-kv ::= string ":" space additional-value
additional-value ::= "[" space (number ("," space number)*)? "]" space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"additional props (true)",
R"""({
"type": "object",
"additionalProperties": true
})""",
R"""(
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
});
test({
SUCCESS,
"additional props (implicit)",
R"""({
"type": "object"
})""",
R"""(
array ::= "[" space ( value ("," space value)* )? "]" space
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
null ::= "null" space
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
object ::= "{" space ( string ":" space value ("," space string ":" space value)* )? "}" space
root ::= object
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
value ::= object | array | string | number | boolean | null
)"""
});
test({
SUCCESS,
"empty w/o additional props",
R"""({
"type": "object",
"additionalProperties": false
})""",
R"""(
root ::= "{" space "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"required + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"required": ["a"],
"additionalProperties": {"type": "string"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space string
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv ( "," space ( additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"optional + additional props",
R"""({
"type": "object",
"properties": {
"a": {"type": "number"}
},
"additionalProperties": {"type": "number"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
a-rest ::= ( "," space additional-kv )*
additional-k ::= ["] ( [a] char+ | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"required + optional + additional props",
R"""({
"type": "object",
"properties": {
"and": {"type": "number"},
"also": {"type": "number"}
},
"required": ["and"],
"additionalProperties": {"type": "number"}
})""",
R"""(
additional-k ::= ["] ( [a] ([l] ([s] ([o] char+ | [^"o] char*) | [^"s] char*) | [n] ([d] char+ | [^"d] char*) | [^"ln] char*) | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space number
also-kv ::= "\"also\"" space ":" space number
also-rest ::= ( "," space additional-kv )*
and-kv ::= "\"and\"" space ":" space number
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space and-kv ( "," space ( also-kv also-rest | additional-kv ( "," space additional-kv )* ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"optional props with empty name",
R"""({
"properties": {
"": {"type": "integer"},
"a": {"type": "integer"}
},
"additionalProperties": {"type": "integer"}
})""",
R"""(
-kv ::= "\"\"" space ":" space root
-rest ::= ( "," space a-kv )? a-rest
a-kv ::= "\"a\"" space ":" space integer
a-rest ::= ( "," space additional-kv )*
additional-k ::= ["] ( [a] char+ | [^"a] char* ) ["] space
additional-kv ::= additional-k ":" space integer
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
integer ::= ("-"? integral-part) space
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= ("-"? integral-part) space
root0 ::= "{" space (-kv -rest | a-kv a-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"optional props with nested names",
R"""({
"properties": {
"a": {"type": "integer"},
"aa": {"type": "integer"}
},
"additionalProperties": {"type": "integer"}
})""",
R"""(
a-kv ::= "\"a\"" space ":" space integer
a-rest ::= ( "," space aa-kv )? aa-rest
aa-kv ::= "\"aa\"" space ":" space integer
aa-rest ::= ( "," space additional-kv )*
additional-k ::= ["] ( [a] ([a] char+ | [^"a] char*) | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space integer
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
integer ::= ("-"? integral-part) space
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= "{" space (a-kv a-rest | aa-kv aa-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"optional props with common prefix",
R"""({
"properties": {
"ab": {"type": "integer"},
"ac": {"type": "integer"}
},
"additionalProperties": {"type": "integer"}
})""",
R"""(
ab-kv ::= "\"ab\"" space ":" space integer
ab-rest ::= ( "," space ac-kv )? ac-rest
ac-kv ::= "\"ac\"" space ":" space integer
ac-rest ::= ( "," space additional-kv )*
additional-k ::= ["] ( [a] ([b] char+ | [c] char+ | [^"bc] char*) | [^"a] char* )? ["] space
additional-kv ::= additional-k ":" space integer
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
integer ::= ("-"? integral-part) space
integral-part ::= [0] | [1-9] [0-9]{0,15}
root ::= "{" space (ab-kv ab-rest | ac-kv ac-rest | additional-kv ( "," space additional-kv )* )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"top-level $ref",
R"""({
"$ref": "#/definitions/foo",
"definitions": {
"foo": {
"type": "object",
"properties": {
"a": {
"type": "string"
}
},
"required": [
"a"
],
"additionalProperties": false
}
}
})""",
R"""(
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string
root ::= ref-definitions-foo
space ::= | " " | "\n" [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"anyOf",
R"""({
"anyOf": [
{"$ref": "#/definitions/foo"},
{"$ref": "#/definitions/bar"}
],
"definitions": {
"foo": {
"properties": {"a": {"type": "number"}}
},
"bar": {
"properties": {"b": {"type": "number"}}
}
},
"type": "object"
})""",
R"""(
alternative-0 ::= ref-definitions-foo
alternative-1 ::= ref-definitions-bar
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
ref-definitions-bar ::= "{" space (ref-definitions-bar-b-kv )? "}" space
ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number
ref-definitions-foo ::= "{" space (ref-definitions-foo-a-kv )? "}" space
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number
root ::= alternative-0 | alternative-1
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"anyOf $ref",
R"""({
"properties": {
"a": {
"anyOf": [
{"type": "string"},
{"type": "number"}
]
},
"b": {
"anyOf": [
{"$ref": "#/properties/a/anyOf/0"},
{"type": "boolean"}
]
}
},
"type": "object"
})""",
R"""(
a ::= string | number
a-kv ::= "\"a\"" space ":" space a
a-rest ::= ( "," space b-kv )?
b ::= b-0 | boolean
b-0 ::= string
b-kv ::= "\"b\"" space ":" space b
boolean ::= ("true" | "false") space
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space (a-kv a-rest | b-kv )? "}" space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
string ::= "\"" char* "\"" space
)"""
});
test({
SUCCESS,
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
R"""({
"allOf": [
{"$ref": "#/definitions/foo"},
{"$ref": "#/definitions/bar"},
{
"anyOf": [
{"$ref": "#/definitions/baz"},
{"$ref": "#/definitions/bam"}
]
}
],
"definitions": {
"foo": {
"properties": {"a": {"type": "number"}}
},
"bar": {
"properties": {"b": {"type": "number"}}
},
"bam": {
"properties": {"c": {"type": "number"}}
},
"baz": {
"properties": {"d": {"type": "number"}}
}
},
"type": "object"
})""",
R"""(
a-kv ::= "\"a\"" space ":" space number
b-kv ::= "\"b\"" space ":" space number
c-kv ::= "\"c\"" space ":" space number
d-kv ::= "\"d\"" space ":" space number
d-rest ::= ( "," space c-kv )?
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
root ::= "{" space a-kv "," space b-kv ( "," space ( d-kv d-rest | c-kv ) )? "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
test({
SUCCESS,
"allOf with enum schema",
R"""({
"allOf": [
{"$ref": "#/definitions/foo"}
],
"definitions": {
"foo": {
"type": "string",
"enum": ["a", "b"]
}
}
})""",
R"""(
root ::= ("\"a\"" | "\"b\"") space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
)"""
});
test({
SUCCESS,
"allOf with multiple enum schemas",
R"""({
"allOf": [
{"$ref": "#/definitions/foo"},
{"$ref": "#/definitions/bar"}
],
"definitions": {
"foo": {
"type": "string",
"enum": ["a", "b", "c"]
},
"bar": {
"type": "string",
"enum": ["b", "c", "d"]
}
}
})""",
R"""(
root ::= ("\"b\"" | "\"c\"") space
space ::= | " " | "\n"{1,2} [ \t]{0,20}
)"""
});
test({
SUCCESS,
"conflicting names",
R"""({
"type": "object",
"properties": {
"number": {
"type": "object",
"properties": {
"number": {
"type": "object",
"properties": {
"root": {
"type": "number"
}
},
"required": [
"root"
],
"additionalProperties": false
}
},
"required": [
"number"
],
"additionalProperties": false
}
},
"required": [
"number"
],
"additionalProperties": false,
"definitions": {}
})""",
R"""(
decimal-part ::= [0-9]{1,16}
integral-part ::= [0] | [1-9] [0-9]{0,15}
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
number- ::= "{" space number-number-kv "}" space
number-kv ::= "\"number\"" space ":" space number-
number-number ::= "{" space number-number-root-kv "}" space
number-number-kv ::= "\"number\"" space ":" space number-number
number-number-root-kv ::= "\"root\"" space ":" space number
root ::= "{" space number-kv "}" space
space ::= | " " | "\n" [ \t]{0,20}
)"""
});
}
int main() {
fprintf(stderr, "LLAMA_NODE_AVAILABLE = %s\n", getenv("LLAMA_NODE_AVAILABLE") ? "true" : "false");
fprintf(stderr, "LLAMA_PYTHON_AVAILABLE = %s\n", getenv("LLAMA_PYTHON_AVAILABLE") ? "true" : "false");
test_all("C++", [](const TestCase & tc) {
try {
tc.verify(json_schema_to_grammar(nlohmann::ordered_json::parse(tc.schema), true));
tc.verify_status(SUCCESS);
} catch (const std::runtime_error & ex) {
fprintf(stderr, "Error: %s\n", ex.what());
tc.verify_status(FAILURE);
}
});
if (getenv("LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR")) {
fprintf(stderr, "\033[33mWARNING: Skipping slow tests on emulator.\n\033[0m");
} else {
if (getenv("LLAMA_PYTHON_AVAILABLE") || (std::system("python -c \"import sys; exit(1) if sys.version_info < (3, 8) else print('Python version is sufficient')\"") == 0)) {
test_all("Python", [](const TestCase & tc) {
write("test-json-schema-input.tmp", tc.schema);
tc.verify_status(std::system(
"python ./examples/json_schema_to_grammar.py test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
tc.verify(read("test-grammar-output.tmp"));
});
} else {
fprintf(stderr, "\033[33mWARNING: Python not found (min version required is 3.8), skipping Python JSON schema -> grammar tests.\n\033[0m");
}
if (getenv("LLAMA_NODE_AVAILABLE") || (std::system("node --version") == 0)) {
test_all("JavaScript", [](const TestCase & tc) {
write("test-json-schema-input.tmp", tc.schema);
tc.verify_status(std::system(
"node ./tests/run-json-schema-to-grammar.mjs test-json-schema-input.tmp > test-grammar-output.tmp") == 0 ? SUCCESS : FAILURE);
tc.verify(read("test-grammar-output.tmp"));
});
} else {
fprintf(stderr, "\033[33mWARNING: Node not found, skipping JavaScript JSON schema -> grammar tests.\n\033[0m");
}
}
test_all("Check Expectations Validity", [](const TestCase & tc) {
if (tc.expected_status == SUCCESS) {
tc.verify_expectation_parseable();
}
});
}