Server: refactor and rename functions (#1151)

* Server: rename functions and refactor code

rename functions

refactor update slots

rename params_base

rename timings

* change

* Revert kv cache name changes

* Revert 2

* fix test build error

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2026-01-18 00:16:57 -06:00
committed by GitHub
parent 7024fdbc72
commit d71a3ec315
38 changed files with 532 additions and 528 deletions

View File

@@ -77,7 +77,7 @@ int main(int argc, char ** argv) {
fprintf(stderr, "\n");
for (auto id : tokens_list) {
fprintf(stderr, "%s", llama_token_to_piece(ctx, id).c_str());
fprintf(stderr, "%s", common_token_to_piece(ctx, id).c_str());
}
fflush(stderr);
@@ -89,7 +89,7 @@ int main(int argc, char ** argv) {
// evaluate the initial prompt
for (size_t i = 0; i < tokens_list.size(); i++) {
llama_batch_add(batch, tokens_list[i], i, { 0 }, false);
common_batch_add(batch, tokens_list[i], i, { 0 }, false);
}
// llama_decode will output logits only for the last token of the prompt
@@ -132,14 +132,14 @@ int main(int argc, char ** argv) {
break;
}
LOG_TEE("%s", llama_token_to_piece(ctx, new_token_id).c_str());
LOG_TEE("%s", common_token_to_piece(ctx, new_token_id).c_str());
fflush(stdout);
// prepare the next batch
llama_batch_clear(batch);
common_batch_clear(batch);
// push this new token for next evaluation
llama_batch_add(batch, new_token_id, n_cur, { 0 }, true);
common_batch_add(batch, new_token_id, n_cur, { 0 }, true);
n_decode += 1;
}