Merge branch 'main' into main

This commit is contained in:
Bartowski
2024-08-14 16:16:15 -04:00
committed by GitHub
56 changed files with 2609 additions and 888 deletions

View File

@@ -1,7 +0,0 @@
"""Types for auth requests."""
from pydantic import BaseModel
class AuthPermissionResponse(BaseModel):
permission: str

View File

@@ -65,3 +65,4 @@ class ChatCompletionStreamChunk(BaseModel):
created: int = Field(default_factory=lambda: int(time()))
model: str
object: str = "chat.completion.chunk"
usage: Optional[UsageStats] = None

View File

@@ -18,6 +18,10 @@ class CompletionResponseFormat(BaseModel):
type: str = "text"
class ChatCompletionStreamOptions(BaseModel):
include_usage: Optional[bool] = False
class CommonCompletionRequest(BaseSamplerRequest):
"""Represents a common completion request."""
@@ -27,6 +31,7 @@ class CommonCompletionRequest(BaseSamplerRequest):
# Generation info (remainder is in BaseSamplerRequest superclass)
stream: Optional[bool] = False
stream_options: Optional[ChatCompletionStreamOptions] = None
logprobs: Optional[int] = Field(
default_factory=lambda: get_default_sampler_value("logprobs", 0)
)

View File

@@ -1,25 +0,0 @@
from pydantic import BaseModel, Field
from typing import List, Optional
def _generate_include_list():
return ["*"]
class DownloadRequest(BaseModel):
"""Parameters for a HuggingFace repo download."""
repo_id: str
repo_type: str = "model"
folder_name: Optional[str] = None
revision: Optional[str] = None
token: Optional[str] = None
include: List[str] = Field(default_factory=_generate_include_list)
exclude: List[str] = Field(default_factory=list)
chunk_limit: Optional[int] = None
class DownloadResponse(BaseModel):
"""Response for a download request."""
download_path: str

View File

@@ -0,0 +1,42 @@
from typing import List, Optional
from pydantic import BaseModel, Field
class UsageInfo(BaseModel):
prompt_tokens: int = 0
total_tokens: int = 0
completion_tokens: Optional[int] = 0
class EmbeddingsRequest(BaseModel):
input: List[str] = Field(
..., description="List of input texts to generate embeddings for."
)
encoding_format: str = Field(
"float",
description="Encoding format for the embeddings. "
"Can be 'float' or 'base64'.",
)
model: Optional[str] = Field(
None,
description="Name of the embedding model to use. "
"If not provided, the default model will be used.",
)
class EmbeddingObject(BaseModel):
object: str = Field("embedding", description="Type of the object.")
embedding: List[float] = Field(
..., description="Embedding values as a list of floats."
)
index: int = Field(
..., description="Index of the input text corresponding to " "the embedding."
)
class EmbeddingsResponse(BaseModel):
object: str = Field("list", description="Type of the response object.")
data: List[EmbeddingObject] = Field(..., description="List of embedding objects.")
model: str = Field(..., description="Name of the embedding model used.")
usage: UsageInfo = Field(..., description="Information about token usage.")

View File

@@ -1,43 +0,0 @@
"""Lora types"""
from pydantic import BaseModel, Field
from time import time
from typing import Optional, List
class LoraCard(BaseModel):
"""Represents a single Lora card."""
id: str = "test"
object: str = "lora"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
scaling: Optional[float] = None
class LoraList(BaseModel):
"""Represents a list of Lora cards."""
object: str = "list"
data: List[LoraCard] = Field(default_factory=list)
class LoraLoadInfo(BaseModel):
"""Represents a single Lora load info."""
name: str
scaling: Optional[float] = 1.0
class LoraLoadRequest(BaseModel):
"""Represents a Lora load request."""
loras: List[LoraLoadInfo]
skip_queue: bool = False
class LoraLoadResponse(BaseModel):
"""Represents a Lora load response."""
success: List[str] = Field(default_factory=list)
failure: List[str] = Field(default_factory=list)

View File

@@ -1,149 +0,0 @@
"""Contains model card types."""
from pydantic import BaseModel, Field, ConfigDict
from time import time
from typing import List, Optional
from common.gen_logging import GenLogPreferences
from common.model import get_config_default
class ModelCardParameters(BaseModel):
"""Represents model card parameters."""
# Safe to do this since it's guaranteed to fetch a max seq len
# from model_container
max_seq_len: Optional[int] = None
rope_scale: Optional[float] = 1.0
rope_alpha: Optional[float] = 1.0
cache_size: Optional[int] = None
cache_mode: Optional[str] = "FP16"
chunk_size: Optional[int] = 2048
prompt_template: Optional[str] = None
num_experts_per_token: Optional[int] = None
# Draft is another model, so include it in the card params
draft: Optional["ModelCard"] = None
class ModelCard(BaseModel):
"""Represents a single model card."""
id: str = "test"
object: str = "model"
created: int = Field(default_factory=lambda: int(time()))
owned_by: str = "tabbyAPI"
logging: Optional[GenLogPreferences] = None
parameters: Optional[ModelCardParameters] = None
class ModelList(BaseModel):
"""Represents a list of model cards."""
object: str = "list"
data: List[ModelCard] = Field(default_factory=list)
class DraftModelLoadRequest(BaseModel):
"""Represents a draft model load request."""
# Required
draft_model_name: str
# Config arguments
draft_rope_scale: Optional[float] = Field(
default_factory=lambda: get_config_default(
"draft_rope_scale", 1.0, is_draft=True
)
)
draft_rope_alpha: Optional[float] = Field(
description="Automatically calculated if not present",
default_factory=lambda: get_config_default(
"draft_rope_alpha", None, is_draft=True
),
examples=[1.0],
)
draft_cache_mode: Optional[str] = Field(
default_factory=lambda: get_config_default(
"draft_cache_mode", "FP16", is_draft=True
)
)
class ModelLoadRequest(BaseModel):
"""Represents a model load request."""
# Required
name: str
# Config arguments
# Max seq len is fetched from config.json of the model by default
max_seq_len: Optional[int] = Field(
description="Leave this blank to use the model's base sequence length",
default_factory=lambda: get_config_default("max_seq_len"),
examples=[4096],
)
override_base_seq_len: Optional[int] = Field(
description=(
"Overrides the model's base sequence length. " "Leave blank if unsure"
),
default_factory=lambda: get_config_default("override_base_seq_len"),
examples=[4096],
)
cache_size: Optional[int] = Field(
description=("Number in tokens, must be greater than or equal to max_seq_len"),
default_factory=lambda: get_config_default("cache_size"),
examples=[4096],
)
gpu_split_auto: Optional[bool] = Field(
default_factory=lambda: get_config_default("gpu_split_auto", True)
)
autosplit_reserve: Optional[List[float]] = Field(
default_factory=lambda: get_config_default("autosplit_reserve", [96])
)
gpu_split: Optional[List[float]] = Field(
default_factory=lambda: get_config_default("gpu_split", []),
examples=[[24.0, 20.0]],
)
rope_scale: Optional[float] = Field(
description="Automatically pulled from the model's config if not present",
default_factory=lambda: get_config_default("rope_scale"),
examples=[1.0],
)
rope_alpha: Optional[float] = Field(
description="Automatically calculated if not present",
default_factory=lambda: get_config_default("rope_alpha"),
examples=[1.0],
)
cache_mode: Optional[str] = Field(
default_factory=lambda: get_config_default("cache_mode", "FP16")
)
chunk_size: Optional[int] = Field(
default_factory=lambda: get_config_default("chunk_size", 2048)
)
prompt_template: Optional[str] = Field(
default_factory=lambda: get_config_default("prompt_template")
)
num_experts_per_token: Optional[int] = Field(
default_factory=lambda: get_config_default("num_experts_per_token")
)
fasttensors: Optional[bool] = Field(
default_factory=lambda: get_config_default("fasttensors", False)
)
# Non-config arguments
draft: Optional[DraftModelLoadRequest] = None
skip_queue: Optional[bool] = False
class ModelLoadResponse(BaseModel):
"""Represents a model load response."""
# Avoids pydantic namespace warning
model_config = ConfigDict(protected_namespaces=[])
model_type: str = "model"
module: int
modules: int
status: str

View File

@@ -1,34 +0,0 @@
from pydantic import BaseModel, Field
from typing import List, Optional
from common.sampling import SamplerOverridesContainer
class SamplerOverrideListResponse(SamplerOverridesContainer):
"""Sampler override list response"""
presets: Optional[List[str]]
class SamplerOverrideSwitchRequest(BaseModel):
"""Sampler override switch request"""
preset: Optional[str] = Field(
default=None, description="Pass a sampler override preset name"
)
overrides: Optional[dict] = Field(
default=None,
description=(
"Sampling override parent takes in individual keys and overrides. "
+ "Ignored if preset is provided."
),
examples=[
{
"top_p": {
"override": 1.5,
"force": False,
}
}
],
)

View File

@@ -1,15 +0,0 @@
from pydantic import BaseModel, Field
from typing import List
class TemplateList(BaseModel):
"""Represents a list of templates."""
object: str = "list"
data: List[str] = Field(default_factory=list)
class TemplateSwitchRequest(BaseModel):
"""Request to switch a template."""
name: str

View File

@@ -1,51 +0,0 @@
"""Tokenization types"""
from pydantic import BaseModel
from typing import Dict, List, Union
class CommonTokenRequest(BaseModel):
"""Represents a common tokenization request."""
add_bos_token: bool = True
encode_special_tokens: bool = True
decode_special_tokens: bool = True
def get_params(self):
"""Get the parameters for tokenization."""
return {
"add_bos_token": self.add_bos_token,
"encode_special_tokens": self.encode_special_tokens,
"decode_special_tokens": self.decode_special_tokens,
}
class TokenEncodeRequest(CommonTokenRequest):
"""Represents a tokenization request."""
text: Union[str, List[Dict[str, str]]]
class TokenEncodeResponse(BaseModel):
"""Represents a tokenization response."""
tokens: List[int]
length: int
class TokenDecodeRequest(CommonTokenRequest):
""" " Represents a detokenization request."""
tokens: List[int]
class TokenDecodeResponse(BaseModel):
"""Represents a detokenization response."""
text: str
class TokenCountResponse(BaseModel):
"""Represents a token count response."""
length: int