mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-26 09:18:53 +00:00
API: Migrate universal routes to core
Place OAI specific routes in the appropriate folder. This is in preperation for adding new API servers that can be optionally enabled. Signed-off-by: kingbri <bdashore3@proton.me>
This commit is contained in:
@@ -1,7 +0,0 @@
|
||||
"""Types for auth requests."""
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class AuthPermissionResponse(BaseModel):
|
||||
permission: str
|
||||
@@ -1,25 +0,0 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
def _generate_include_list():
|
||||
return ["*"]
|
||||
|
||||
|
||||
class DownloadRequest(BaseModel):
|
||||
"""Parameters for a HuggingFace repo download."""
|
||||
|
||||
repo_id: str
|
||||
repo_type: str = "model"
|
||||
folder_name: Optional[str] = None
|
||||
revision: Optional[str] = None
|
||||
token: Optional[str] = None
|
||||
include: List[str] = Field(default_factory=_generate_include_list)
|
||||
exclude: List[str] = Field(default_factory=list)
|
||||
chunk_limit: Optional[int] = None
|
||||
|
||||
|
||||
class DownloadResponse(BaseModel):
|
||||
"""Response for a download request."""
|
||||
|
||||
download_path: str
|
||||
@@ -1,43 +0,0 @@
|
||||
"""Lora types"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from time import time
|
||||
from typing import Optional, List
|
||||
|
||||
|
||||
class LoraCard(BaseModel):
|
||||
"""Represents a single Lora card."""
|
||||
|
||||
id: str = "test"
|
||||
object: str = "lora"
|
||||
created: int = Field(default_factory=lambda: int(time()))
|
||||
owned_by: str = "tabbyAPI"
|
||||
scaling: Optional[float] = None
|
||||
|
||||
|
||||
class LoraList(BaseModel):
|
||||
"""Represents a list of Lora cards."""
|
||||
|
||||
object: str = "list"
|
||||
data: List[LoraCard] = Field(default_factory=list)
|
||||
|
||||
|
||||
class LoraLoadInfo(BaseModel):
|
||||
"""Represents a single Lora load info."""
|
||||
|
||||
name: str
|
||||
scaling: Optional[float] = 1.0
|
||||
|
||||
|
||||
class LoraLoadRequest(BaseModel):
|
||||
"""Represents a Lora load request."""
|
||||
|
||||
loras: List[LoraLoadInfo]
|
||||
skip_queue: bool = False
|
||||
|
||||
|
||||
class LoraLoadResponse(BaseModel):
|
||||
"""Represents a Lora load response."""
|
||||
|
||||
success: List[str] = Field(default_factory=list)
|
||||
failure: List[str] = Field(default_factory=list)
|
||||
@@ -1,149 +0,0 @@
|
||||
"""Contains model card types."""
|
||||
|
||||
from pydantic import BaseModel, Field, ConfigDict
|
||||
from time import time
|
||||
from typing import List, Optional
|
||||
|
||||
from common.gen_logging import GenLogPreferences
|
||||
from common.model import get_config_default
|
||||
|
||||
|
||||
class ModelCardParameters(BaseModel):
|
||||
"""Represents model card parameters."""
|
||||
|
||||
# Safe to do this since it's guaranteed to fetch a max seq len
|
||||
# from model_container
|
||||
max_seq_len: Optional[int] = None
|
||||
rope_scale: Optional[float] = 1.0
|
||||
rope_alpha: Optional[float] = 1.0
|
||||
cache_size: Optional[int] = None
|
||||
cache_mode: Optional[str] = "FP16"
|
||||
chunk_size: Optional[int] = 2048
|
||||
prompt_template: Optional[str] = None
|
||||
num_experts_per_token: Optional[int] = None
|
||||
|
||||
# Draft is another model, so include it in the card params
|
||||
draft: Optional["ModelCard"] = None
|
||||
|
||||
|
||||
class ModelCard(BaseModel):
|
||||
"""Represents a single model card."""
|
||||
|
||||
id: str = "test"
|
||||
object: str = "model"
|
||||
created: int = Field(default_factory=lambda: int(time()))
|
||||
owned_by: str = "tabbyAPI"
|
||||
logging: Optional[GenLogPreferences] = None
|
||||
parameters: Optional[ModelCardParameters] = None
|
||||
|
||||
|
||||
class ModelList(BaseModel):
|
||||
"""Represents a list of model cards."""
|
||||
|
||||
object: str = "list"
|
||||
data: List[ModelCard] = Field(default_factory=list)
|
||||
|
||||
|
||||
class DraftModelLoadRequest(BaseModel):
|
||||
"""Represents a draft model load request."""
|
||||
|
||||
# Required
|
||||
draft_model_name: str
|
||||
|
||||
# Config arguments
|
||||
draft_rope_scale: Optional[float] = Field(
|
||||
default_factory=lambda: get_config_default(
|
||||
"draft_rope_scale", 1.0, is_draft=True
|
||||
)
|
||||
)
|
||||
draft_rope_alpha: Optional[float] = Field(
|
||||
description="Automatically calculated if not present",
|
||||
default_factory=lambda: get_config_default(
|
||||
"draft_rope_alpha", None, is_draft=True
|
||||
),
|
||||
examples=[1.0],
|
||||
)
|
||||
draft_cache_mode: Optional[str] = Field(
|
||||
default_factory=lambda: get_config_default(
|
||||
"draft_cache_mode", "FP16", is_draft=True
|
||||
)
|
||||
)
|
||||
|
||||
|
||||
class ModelLoadRequest(BaseModel):
|
||||
"""Represents a model load request."""
|
||||
|
||||
# Required
|
||||
name: str
|
||||
|
||||
# Config arguments
|
||||
|
||||
# Max seq len is fetched from config.json of the model by default
|
||||
max_seq_len: Optional[int] = Field(
|
||||
description="Leave this blank to use the model's base sequence length",
|
||||
default_factory=lambda: get_config_default("max_seq_len"),
|
||||
examples=[4096],
|
||||
)
|
||||
override_base_seq_len: Optional[int] = Field(
|
||||
description=(
|
||||
"Overrides the model's base sequence length. " "Leave blank if unsure"
|
||||
),
|
||||
default_factory=lambda: get_config_default("override_base_seq_len"),
|
||||
examples=[4096],
|
||||
)
|
||||
cache_size: Optional[int] = Field(
|
||||
description=("Number in tokens, must be greater than or equal to max_seq_len"),
|
||||
default_factory=lambda: get_config_default("cache_size"),
|
||||
examples=[4096],
|
||||
)
|
||||
gpu_split_auto: Optional[bool] = Field(
|
||||
default_factory=lambda: get_config_default("gpu_split_auto", True)
|
||||
)
|
||||
autosplit_reserve: Optional[List[float]] = Field(
|
||||
default_factory=lambda: get_config_default("autosplit_reserve", [96])
|
||||
)
|
||||
gpu_split: Optional[List[float]] = Field(
|
||||
default_factory=lambda: get_config_default("gpu_split", []),
|
||||
examples=[[24.0, 20.0]],
|
||||
)
|
||||
rope_scale: Optional[float] = Field(
|
||||
description="Automatically pulled from the model's config if not present",
|
||||
default_factory=lambda: get_config_default("rope_scale"),
|
||||
examples=[1.0],
|
||||
)
|
||||
rope_alpha: Optional[float] = Field(
|
||||
description="Automatically calculated if not present",
|
||||
default_factory=lambda: get_config_default("rope_alpha"),
|
||||
examples=[1.0],
|
||||
)
|
||||
cache_mode: Optional[str] = Field(
|
||||
default_factory=lambda: get_config_default("cache_mode", "FP16")
|
||||
)
|
||||
chunk_size: Optional[int] = Field(
|
||||
default_factory=lambda: get_config_default("chunk_size", 2048)
|
||||
)
|
||||
prompt_template: Optional[str] = Field(
|
||||
default_factory=lambda: get_config_default("prompt_template")
|
||||
)
|
||||
num_experts_per_token: Optional[int] = Field(
|
||||
default_factory=lambda: get_config_default("num_experts_per_token")
|
||||
)
|
||||
fasttensors: Optional[bool] = Field(
|
||||
default_factory=lambda: get_config_default("fasttensors", False)
|
||||
)
|
||||
|
||||
# Non-config arguments
|
||||
draft: Optional[DraftModelLoadRequest] = None
|
||||
skip_queue: Optional[bool] = False
|
||||
|
||||
|
||||
class ModelLoadResponse(BaseModel):
|
||||
"""Represents a model load response."""
|
||||
|
||||
# Avoids pydantic namespace warning
|
||||
model_config = ConfigDict(protected_namespaces=[])
|
||||
|
||||
model_type: str = "model"
|
||||
module: int
|
||||
modules: int
|
||||
status: str
|
||||
@@ -1,34 +0,0 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
|
||||
from common.sampling import SamplerOverridesContainer
|
||||
|
||||
|
||||
class SamplerOverrideListResponse(SamplerOverridesContainer):
|
||||
"""Sampler override list response"""
|
||||
|
||||
presets: Optional[List[str]]
|
||||
|
||||
|
||||
class SamplerOverrideSwitchRequest(BaseModel):
|
||||
"""Sampler override switch request"""
|
||||
|
||||
preset: Optional[str] = Field(
|
||||
default=None, description="Pass a sampler override preset name"
|
||||
)
|
||||
|
||||
overrides: Optional[dict] = Field(
|
||||
default=None,
|
||||
description=(
|
||||
"Sampling override parent takes in individual keys and overrides. "
|
||||
+ "Ignored if preset is provided."
|
||||
),
|
||||
examples=[
|
||||
{
|
||||
"top_p": {
|
||||
"override": 1.5,
|
||||
"force": False,
|
||||
}
|
||||
}
|
||||
],
|
||||
)
|
||||
@@ -1,15 +0,0 @@
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List
|
||||
|
||||
|
||||
class TemplateList(BaseModel):
|
||||
"""Represents a list of templates."""
|
||||
|
||||
object: str = "list"
|
||||
data: List[str] = Field(default_factory=list)
|
||||
|
||||
|
||||
class TemplateSwitchRequest(BaseModel):
|
||||
"""Request to switch a template."""
|
||||
|
||||
name: str
|
||||
@@ -1,51 +0,0 @@
|
||||
"""Tokenization types"""
|
||||
|
||||
from pydantic import BaseModel
|
||||
from typing import Dict, List, Union
|
||||
|
||||
|
||||
class CommonTokenRequest(BaseModel):
|
||||
"""Represents a common tokenization request."""
|
||||
|
||||
add_bos_token: bool = True
|
||||
encode_special_tokens: bool = True
|
||||
decode_special_tokens: bool = True
|
||||
|
||||
def get_params(self):
|
||||
"""Get the parameters for tokenization."""
|
||||
return {
|
||||
"add_bos_token": self.add_bos_token,
|
||||
"encode_special_tokens": self.encode_special_tokens,
|
||||
"decode_special_tokens": self.decode_special_tokens,
|
||||
}
|
||||
|
||||
|
||||
class TokenEncodeRequest(CommonTokenRequest):
|
||||
"""Represents a tokenization request."""
|
||||
|
||||
text: Union[str, List[Dict[str, str]]]
|
||||
|
||||
|
||||
class TokenEncodeResponse(BaseModel):
|
||||
"""Represents a tokenization response."""
|
||||
|
||||
tokens: List[int]
|
||||
length: int
|
||||
|
||||
|
||||
class TokenDecodeRequest(CommonTokenRequest):
|
||||
""" " Represents a detokenization request."""
|
||||
|
||||
tokens: List[int]
|
||||
|
||||
|
||||
class TokenDecodeResponse(BaseModel):
|
||||
"""Represents a detokenization response."""
|
||||
|
||||
text: str
|
||||
|
||||
|
||||
class TokenCountResponse(BaseModel):
|
||||
"""Represents a token count response."""
|
||||
|
||||
length: int
|
||||
Reference in New Issue
Block a user