Add vision support in llama-server (#901)

* server: add support for vision model
webui: add support for vision model

* server : remove hack for extra parallel slot#10187

* llama : fix KV shift for qwen2vl #13870

* add no-context-shift parameter

---------

Co-authored-by: firecoperana <firecoperana>
This commit is contained in:
firecoperana
2025-11-05 08:43:46 +00:00
committed by GitHub
parent 5b38d431ac
commit 15159a87d4
26 changed files with 2456 additions and 729 deletions

View File

@@ -1,6 +1,6 @@
// @ts-expect-error this package does not have typing
import TextLineStream from 'textlinestream';
import { APIMessage, Message } from './types';
import { APIMessage, Message, LlamaCppServerProps, APIMessageContentPart } from './types';
// ponyfill for missing ReadableStream asyncIterator on Safari
import { asyncIterator } from '@sec-ant/readable-stream/ponyfill/asyncIterator';
@@ -57,21 +57,55 @@ export const copyStr = (textToCopy: string) => {
*/
export function normalizeMsgsForAPI(messages: Readonly<Message[]>) {
return messages.map((msg) => {
let newContent = '';
if (msg.role !== 'user' || !msg.extra) {
return {
role: msg.role,
content: msg.content,
} as APIMessage;
}
// extra content first, then user text message in the end
// this allow re-using the same cache prefix for long context
const contentArr: APIMessageContentPart[] = [];
for (const extra of msg.extra ?? []) {
if (extra.type === 'context') {
if (extra.content!='') {
newContent += `${extra.content}\n\n`;
}
contentArr.push({
type: 'text',
text: extra.content,
});
} else if (extra.type === 'textFile') {
contentArr.push({
type: 'text',
text: `File: ${extra.name}\nContent:\n\n${extra.content}`,
});
} else if (extra.type === 'imageFile') {
contentArr.push({
type: 'image_url',
image_url: { url: extra.base64Url },
});
} else if (extra.type === 'audioFile') {
contentArr.push({
type: 'input_audio',
input_audio: {
data: extra.base64Data,
format: /wav/.test(extra.mimeType) ? 'wav' : 'mp3',
},
});
} else {
throw new Error('Unknown extra type');
}
}
newContent += msg.content;
// add user message to the end
contentArr.push({
type: 'text',
text: msg.content,
});
return {
role: msg.role,
content: newContent,
content: contentArr,
};
}) as APIMessage[];
}
@@ -137,3 +171,25 @@ export const cleanCurrentUrl = (removeQueryParams: string[]) => {
});
window.history.replaceState({}, '', url.toString());
};
export const getServerProps = async (
baseUrl: string,
apiKey?: string
): Promise<LlamaCppServerProps> => {
try {
const response = await fetch(`${baseUrl}/props`, {
headers: {
'Content-Type': 'application/json',
...(apiKey ? { Authorization: `Bearer ${apiKey}` } : {}),
},
});
if (!response.ok) {
throw new Error('Failed to fetch server props');
}
const data = await response.json();
return data as LlamaCppServerProps;
} catch (error) {
console.error('Error fetching server props:', error);
throw error;
}
};