删除废弃代码

This commit is contained in:
RICHARDNAN
2025-10-25 09:52:43 +08:00
parent 573c603656
commit 48fdacedd0
4 changed files with 3 additions and 53 deletions

View File

@@ -37,8 +37,9 @@ conda install -c conda-forge libstdcxx-ng # 安装`GLIBCXX-3.4.32`
apt install zlib1g-dev libtbb-dev libssl-dev libaio-dev libcurl4-openssl-dev
pip3 install numpy==1.26.4 # 适配torch/torch_npu
pip3 install torch==2.5.1 torchvision==0.20.1 torchaudio==2.5.1 --index-url https://download.pytorch.org/whl/cpu
pip3 install packaging ninja transformers==4.43.2 fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
pip3 install packaging ninja fire protobuf attrs decorator cloudpickle ml-dtypes scipy tornado absl-py psutil
pip3 install sqlalchemy
pip3 install transformers==4.57.1 #此处注意运行时transformers版本要求4.57.1(其他版本未验证)
#pip3 install cpufeature # only for x86
```
@@ -121,7 +122,7 @@ python ktransformers/server/main.py \
--gguf_path /mnt/data/models/DeepSeek-R1-q4km-w8a8 \
--model_name DeepSeekV3ForCausalLM \
--cpu_infer 60 \
--optimize_config_path /home/huawei/ktransformers/ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
--optimize_config_path ./ktransformers/optimize/optimize_rules/DeepSeek-V3-Chat-300IA2-npu-serve.yaml \
--max_new_tokens 128 \
--max_batch_size 4 \
--use_cuda_graph \

View File

@@ -114,16 +114,6 @@ class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
print("########################################")
print("hidden_states is ", hidden_states)
print("########################################")
# with torch.npu.stream(self.call_stream):
# position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size = param
# print("########################################")
# print("position_ids is ", position_ids)
# print("page_idx is ", page_idx)
# print("page_offset is ", page_offset)
# print("block_tables is ", block_tables)
# print("hidden_states is ", hidden_states)
# print("#########################################")
def forward(
self,
@@ -172,27 +162,8 @@ class KNPUDeepseekV3ForCausalLM(DeepseekV3PreTrainedModel):
q_len_raw = None
kv_len_raw = batch.minibatch.d_kv_len_list
bsz_real = None
# if utils._USE_NPU_GRAPH:
# from libgraph_capture import graph_capture_launch_callback
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
# else:
# param = (position_ids, page_idx, page_offset, block_tables, hidden_states, bsz, q_len, hidden_size)
# self.print_callback(param)
# with torch_npu.npu.stream(self.stream):
# print_ex("####: before decode layer...")
for i, decode_layer in enumerate(self.model.layers):
# if not is_prefill:
# if utils._USE_NPU_GRAPH:
# from libgraph_capture import graph_capture_launch_callback
# param = (hidden_states, )
# graph_capture_launch_callback(self.print_callback, param, 1, self.stream.npu_stream)
# else:
# param = (hidden_states, )
# self.print_callback(param)
# attn
residual = hidden_states
hidden_states = decode_layer.input_layernorm(hidden_states)

View File

@@ -232,12 +232,6 @@ class KTransformersInterface(TransformersInterface):
)
self.seq_length = 1
# flat_prev_ids = self.generated_ids.flatten()
# for i in range(min(self.seq_length, flat_input_ids.shape[0]) - 1):
# if flat_input_ids[i] == flat_prev_ids[i]:
# same_prefix += 1
# else:
# break
logger.debug(f"same prefix len: {same_prefix}")
self.cache.remove_suffix(same_prefix)

View File

@@ -106,19 +106,3 @@ class ProfStatItem:
PROF_TIME_STAT = ProfTimeStat()
# j=0
# start_time = PROF_TIME_STAT.record_start_time()
# for i in range(500):
# j+=1
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
# for i in range(500):
# j+=1
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
# for i in range(500):
# j+=1
# PROF_TIME_STAT.add_time_stat(ProfStatKey.ExpertsSummitCurrLayer, start_time, False)
# PROF_TIME_STAT.print_all()