OAI: Clarify types in docs

Adding field descriptions show which parameters are used solely for
OAI compliance and not actually parsed in the model code.

Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
kingbri
2023-12-17 14:20:56 -05:00
committed by Brian Dashore
parent 51ca1ff396
commit e895eaa4bd
2 changed files with 10 additions and 10 deletions

View File

@@ -19,13 +19,13 @@ class CommonCompletionRequest(BaseModel):
model: Optional[str] = None
# Extra OAI request stuff
best_of: Optional[int] = None
echo: Optional[bool] = False
logit_bias: Optional[Dict[str, float]] = None
logprobs: Optional[int] = None
n: Optional[int] = 1
suffix: Optional[str] = None
user: Optional[str] = None
best_of: Optional[int] = Field(description = "Not parsed. Only used for OAI compliance.", default = None)
echo: Optional[bool] = Field(description = "Not parsed. Only used for OAI compliance.", default = False)
logit_bias: Optional[Dict[str, float]] = Field(description = "Not parsed. Only used for OAI compliance.", default = None)
logprobs: Optional[int] = Field(description = "Not parsed. Only used for OAI compliance.", default = None)
n: Optional[int] = Field(description = "Not parsed. Only used for OAI compliance.", default = 1)
suffix: Optional[str] = Field(description = "Not parsed. Only used for OAI compliance.", default = None)
user: Optional[str] = Field(description = "Not parsed. Only used for OAI compliance.", default = None)
# Generation info
# seed: Optional[int] = -1
@@ -36,7 +36,7 @@ class CommonCompletionRequest(BaseModel):
max_tokens: Optional[int] = 150
# Aliased to repetition_penalty
frequency_penalty: Optional[float] = 0.0
frequency_penalty: Optional[float] = Field(description = "Aliased to Repetition Penalty", default = 0.0)
# Sampling params
token_healing: Optional[bool] = False

View File

@@ -27,7 +27,7 @@ class ModelList(BaseModel):
class DraftModelLoadRequest(BaseModel):
draft_model_name: str
draft_rope_scale: Optional[float] = 1.0
draft_rope_alpha: Optional[float] = None
draft_rope_alpha: Optional[float] = Field(description = "Automatically calculated if not present", default = None)
# TODO: Unify this with ModelCardParams
class ModelLoadRequest(BaseModel):
@@ -36,7 +36,7 @@ class ModelLoadRequest(BaseModel):
gpu_split_auto: Optional[bool] = True
gpu_split: Optional[List[float]] = Field(default_factory=list)
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = None
rope_alpha: Optional[float] = Field(description = "Automatically calculated if not present", default = None)
no_flash_attention: Optional[bool] = False
# low_mem: Optional[bool] = False
cache_mode: Optional[str] = "FP16"