update kvc disk path config.

This commit is contained in:
ouqingliang
2025-06-30 15:09:35 +00:00
parent aadf31b35d
commit 90cff820cf
4 changed files with 6 additions and 3 deletions

View File

@@ -14,6 +14,7 @@ kvc2:
gpu_only: false # Set to false to enable prefix cache mode (Disk + CPU + GPU KV storage)
utilization_percentage: 1.0
cpu_memory_size_GB: 500 # Amount of CPU memory allocated for KV Cache
disk_path: /mnt/data/kvc # Path to store KV Cache on disk
```
### Step 2: Update Submodules and Recompile

View File

@@ -70,3 +70,4 @@ kvc2:
gpu_only: false
utilization_percentage: 1.0
cpu_memory_size_GB: 500
disk_path: /mnt/data/kvc

View File

@@ -51,7 +51,7 @@ def create_sched_settings(args):
settings.k_cache_on = True
settings.v_cache_on = False
settings.kvc2_root_path = '/mnt/data/persist-kvc'
settings.kvc2_root_path = args.kvc2_disk_path
settings.kvc2_config_path = args.kvc2_config_dir
settings.memory_pool_size_GB = args.cpu_memory_size_GB
settings.evict_count = 40
@@ -104,7 +104,7 @@ def create_sched_settings_qwen2moe(args):
settings.k_cache_on = True
settings.v_cache_on = True
settings.kvc2_root_path = '/mnt/data/persist-kvc'
settings.kvc2_root_path = args.kvc2_disk_path
settings.kvc2_config_path = args.kvc2_config_dir
settings.memory_pool_size_GB = args.cpu_memory_size_GB
settings.evict_count = 40
@@ -158,7 +158,7 @@ def create_sched_settings_qwen3moe(args):
settings.k_cache_on = True
settings.v_cache_on = True
settings.kvc2_root_path = '/mnt/data/persist-kvc'
settings.kvc2_root_path = args.kvc2_disk_path
settings.kvc2_config_path = args.kvc2_config_dir
settings.memory_pool_size_GB = args.cpu_memory_size_GB
settings.evict_count = 40

View File

@@ -203,6 +203,7 @@ class Config(metaclass=Singleton):
self.gpu_memory_size = 2*576*61*self.cache_lens
self.utilization_percentage = 1.0 #cfg["kvc2"]["utilization_percentage"]
self.cpu_memory_size_GB = cfg["kvc2"]["cpu_memory_size_GB"]
self.kvc2_disk_path = cfg["kvc2"]["disk_path"]
# only support 2 prefill task
self.max_prefill_batch_size = 2
self.max_decode_batch_size = self.max_batch_size - self.max_prefill_batch_size