mirror of
https://github.com/kvcache-ai/ktransformers.git
synced 2026-04-20 06:18:59 +00:00
删除废弃代码
This commit is contained in:
@@ -37,8 +37,9 @@ conda install -c conda-forge libstdcxx-ng # 安装`GLIBCXX-3.4.32`
|
||||
apt install zlib1g-dev libtbb-dev libssl-dev libaio-dev libcurl4-openssl-dev
|
||||
pip3 install numpy==1.26.4 # 适配torch/torch_npu
|
||||
pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
|
||||
pip3 install packaging ninja transformers==4.43.2 fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
|
||||
pip3 install packaging ninja fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
|
||||
pip3 install sqlalchemy
|
||||
pip3 install transformers==4.57.1 #此处注意运行时transformers版本要求4.57.1(其他版本未验证)
|
||||
#pip3 install cpufeature # only for x86
|
||||
```
|
||||
|
||||
@@ -121,7 +122,7 @@ python ktransformers/server/main.py \
|
||||
--gguf_path /mnt/data/models/DeepSeek-R1-q4km-w8a8 \
|
||||
--model_name DeepSeekV3ForCausalLM \
|
||||
--cpu_infer 60 \
|
||||
--optimize_config_path /home/huawei/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
|
||||
--optimize_config_path ./ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
|
||||
--max_new_tokens 128 \
|
||||
--max_batch_size 4 \
|
||||
--use_cuda_graph \
|
||||
|
||||
@@ -114,16 +114,6 @@ class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
|
||||
print("########################################")
|
||||
print("hidden_states is ", hidden_states)
|
||||
print("########################################")
|
||||
# with torch.npu.stream(self.call_stream):
|
||||
# position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size = param
|
||||
# print("########################################")
|
||||
# print("position_ids is ", position_ids)
|
||||
# print("page_idx is ", page_idx)
|
||||
# print("page_offset is ", page_offset)
|
||||
# print("block_tables is ", block_tables)
|
||||
# print("hidden_states is ", hidden_states)
|
||||
# print("#########################################")
|
||||
|
||||
|
||||
def forward(
|
||||
self,
|
||||
@@ -172,27 +162,8 @@ class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
|
||||
q_len_raw = None
|
||||
kv_len_raw = batch.minibatch.d_kv_len_list
|
||||
bsz_real = None
|
||||
# if utils._USE_NPU_GRAPH:
|
||||
# from libgraph_capture import graph_capture_launch_callback
|
||||
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
|
||||
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
|
||||
# else:
|
||||
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
|
||||
# self.print_callback(param)
|
||||
|
||||
|
||||
# with torch_npu.npu.stream(self.stream):
|
||||
# print_ex("####: before decode layer...")
|
||||
for i, decode_layer in enumerate(self.model.layers):
|
||||
# if not is_prefill:
|
||||
# if utils._USE_NPU_GRAPH:
|
||||
# from libgraph_capture import graph_capture_launch_callback
|
||||
# param = (hidden_states, )
|
||||
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
|
||||
# else:
|
||||
# param = (hidden_states, )
|
||||
# self.print_callback(param)
|
||||
# attn
|
||||
residual = hidden_states
|
||||
hidden_states = decode_layer.input_layernorm(hidden_states)
|
||||
|
||||
|
||||
@@ -232,12 +232,6 @@ class KTransformersInterface(TransformersInterface):
|
||||
)
|
||||
self.seq_length = 1
|
||||
|
||||
# flat_prev_ids = self.generated_ids.flatten()
|
||||
# for i in range(min(self.seq_length, flat_input_ids.shape[0]) - 1):
|
||||
# if flat_input_ids[i] == flat_prev_ids[i]:
|
||||
# same_prefix += 1
|
||||
# else:
|
||||
# break
|
||||
|
||||
logger.debug(f"same prefix len: {same_prefix}")
|
||||
self.cache.remove_suffix(same_prefix)
|
||||
|
||||
@@ -106,19 +106,3 @@ class ProfStatItem:
|
||||
|
||||
PROF_TIME_STAT = ProfTimeStat()
|
||||
|
||||
|
||||
# j=0
|
||||
# start_time = PROF_TIME_STAT.record_start_time()
|
||||
# for i in range(500):
|
||||
# j+=1
|
||||
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
|
||||
|
||||
# for i in range(500):
|
||||
# j+=1
|
||||
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
|
||||
|
||||
# for i in range(500):
|
||||
# j+=1
|
||||
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
|
||||
|
||||
# PROF_TIME_STAT.print_all()
|
||||
Reference in New Issue
Block a user