mirror of
https://github.com/theroyallab/tabbyAPI.git
synced 2026-04-21 14:59:02 +00:00
Revision to paged attention checks (#133)
* Model: Clean up paged attention checks * Model: Move cache_size checks after paged attn checks Cache size is only relevant in paged mode * Model: Fix no_flash_attention * Model: Remove no_flash_attention Ability to use flash attention is auto-detected, so this flag is unneeded. Uninstall flash attention to disable it on supported hardware.
This commit is contained in:
@@ -94,7 +94,6 @@ class ModelLoadRequest(BaseModel):
|
||||
default=None,
|
||||
examples=[1.0],
|
||||
)
|
||||
no_flash_attention: Optional[bool] = False
|
||||
# low_mem: Optional[bool] = False
|
||||
cache_mode: Optional[str] = "FP16"
|
||||
chunk_size: Optional[int] = 2048
|
||||
|
||||
Reference in New Issue
Block a user